[X86] Prefer unpckhpd over movhlps in isel for fake unary cases
In r337348, I changed lowering to prefer X86ISD::UNPCKL/UNPCKH opcodes over MOVLHPS/MOVHLPS for v2f64 {0,0} and {1,1} shuffles when we have SSE2. This enabled the removal of a bunch of weirdly bitcasted isel patterns in r337349. To avoid changing the tests I placed a gross hack in isel to still emit movhlps instructions for fake unary unpckh nodes. A similar hack was not needed for unpckl and movlhps because we do execution domain switching for those. But unpckh and movhlps have swapped operand order.
This patch removes the hack.
This is a code size increase since unpckhpd requires a 0x66 prefix and movhlps does not. But if that's a big concern we should be using movhlps for all unpckhpd opcodes and let commuteInstruction turnit into unpckhpd when its an advantage.
Differential Revision: https://reviews.llvm.org/D49499
llvm-svn: 341973
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
index 411acd8..a4b255c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
@@ -50,7 +50,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -62,7 +62,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -101,7 +101,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -110,7 +110,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -122,7 +122,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -130,7 +130,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -187,7 +187,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -196,7 +196,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -205,7 +205,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@@ -214,7 +214,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm4, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
@@ -226,7 +226,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -234,7 +234,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -242,7 +242,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@@ -250,7 +250,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm4, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
@@ -371,7 +371,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -382,7 +382,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -418,7 +418,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@@ -427,7 +427,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -438,7 +438,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@@ -446,7 +446,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -500,7 +500,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
@@ -509,7 +509,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -518,7 +518,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -527,7 +527,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@@ -538,7 +538,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
@@ -546,7 +546,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -554,7 +554,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -562,7 +562,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@@ -679,7 +679,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -690,7 +690,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -726,7 +726,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@@ -735,7 +735,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -746,7 +746,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@@ -754,7 +754,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -808,7 +808,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
@@ -817,7 +817,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@@ -826,7 +826,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@@ -835,7 +835,7 @@
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@@ -846,7 +846,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
@@ -854,7 +854,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@@ -862,7 +862,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@@ -870,7 +870,7 @@
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@@ -1182,8 +1182,8 @@
define double @test_v2f64_one(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@@ -1206,8 +1206,8 @@
define double @test_v4f64_one(<4 x double> %a0) {
; SSE-LABEL: test_v4f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm2
; SSE-NEXT: mulsd %xmm1, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
@@ -1243,8 +1243,8 @@
define double @test_v8f64_one(<8 x double> %a0) {
; SSE-LABEL: test_v8f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm4
-; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
+; SSE-NEXT: movapd %xmm0, %xmm4
+; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: mulsd %xmm1, %xmm4
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]