X86: Custom lower <2 x i64> eq and ne when SSE41 is not available. pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack. Small speedup on loop-vectorized viterbi (-march=core2). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171063 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 382ed78d3fef9f6c582e3cdcfb30f8c6fa3d0d79 [log] [tgz]
author: Benjamin Kramer <benny.kra@googlemail.com> Tue Dec 25 12:54:19 2012 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> Tue Dec 25 12:54:19 2012 +0000
tree: 8afbebf801304afabf67b3a32da347dc6e1a3159
parent: 4684858624d7ffe82379783e9b678227d5e0b515 [diff] [blame]
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5f7f915..a173712 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -9171,8 +9171,30 @@
   if (VT == MVT::v2i64) {
     if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
       return SDValue();
-    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
-      return SDValue();
+    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+      // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+      // pcmpeqd + 2 shuffles + pand.
+      assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+      // First cast everything to the right type,
+      Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+      Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+      // Do the compare.
+      SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+      // Make sure the lower and upper halves are both all-ones.
+      const int Mask1[] = { 0, 0, 2, 2 };
+      SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1);
+      const int Mask2[] = { 1, 1, 3, 3 };
+      SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2);
+      Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2);
+
+      if (Invert)
+        Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+      return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+    }
   }
 
   // Since SSE has no unsigned integer comparisons, we need to flip  the sign
commit	382ed78d3fef9f6c582e3cdcfb30f8c6fa3d0d79	[log] [tgz]
author	Benjamin Kramer <benny.kra@googlemail.com>	Tue Dec 25 12:54:19 2012 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	Tue Dec 25 12:54:19 2012 +0000
tree	8afbebf801304afabf67b3a32da347dc6e1a3159
parent	4684858624d7ffe82379783e9b678227d5e0b515 [diff] [blame]