Xform bitconvert(build_pair(load a, load b)) to a single load if the load locations are at the right offset from each other.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51008 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 1a5d904..34b949a 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -428,60 +428,6 @@
 
 //===---------------------------------------------------------------------===//
 
-Consider (PR2108):
-
-#include <xmmintrin.h>
-__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);}
-__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);}
-
-These are very similar routines, but we generate significantly worse code for
-the first one on x86-32:
-
-_doload64:
-	subl	$12, %esp
-	movl	20(%esp), %eax
-	movl	%eax, 4(%esp)
-	movl	16(%esp), %eax
-	movl	%eax, (%esp)
-	movsd	(%esp), %xmm0
-	addl	$12, %esp
-	ret
-_doload64_2:
-	movl	4(%esp), %eax
-	movsd	(%eax), %xmm0
-	ret
-
-The problem is that the argument lowering logic splits the i64 argument into
-2x i32 loads early, the f64 insert doesn't match.  Here's a reduced testcase:
-
-define fastcc double @doload64(i64 %x) nounwind  {
-entry:
-	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
-	ret double %tmp717
-}
-
-compiles to:
-
-_doload64:
-	subl	$12, %esp
-	movl	20(%esp), %eax
-	movl	%eax, 4(%esp)
-	movl	16(%esp), %eax
-	movl	%eax, (%esp)
-	movsd	(%esp), %xmm0
-	addl	$12, %esp
-	ret
-
-instead of movsd from the stack.  This is actually not too bad to implement. The
-best way to do this is to implement a dag combine that turns 
-bitconvert(build_pair(load a, load b)) into one load of the right type.  The
-only trick to this is writing the predicate that determines that a/b are at the
-right offset from each other.  For the enterprising hacker, InferAlignment is a
-helpful place to start poking if interested.
-
-
-//===---------------------------------------------------------------------===//
-
 __m128d test1( __m128d A, __m128d B) {
   return _mm_shuffle_pd(A, B, 0x3);
 }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5d50e36..806b626 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6285,13 +6285,7 @@
                      LD->getAlignment());
 }
 
-static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
-  SDOperand Elt = N->getOperand(i);
-  if (Elt.getOpcode() != ISD::MERGE_VALUES)
-    return Elt.Val;
-  return Elt.getOperand(Elt.ResNo).Val;
-}
-
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
 static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
                                            const X86Subtarget *Subtarget,
                                            const TargetLowering &TLI) {
@@ -6312,25 +6306,17 @@
     return SDOperand();
 
   // Value must be a load.
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDNode *Base = N->getOperand(0).Val;
   if (!isa<LoadSDNode>(Base)) {
-    if (Base->getOpcode() == ISD::BIT_CONVERT)
-      Base = Base->getOperand(0).Val;
-    if (Base->getOpcode() != ISD::BUILD_PAIR)
+    if (Base->getOpcode() != ISD::BIT_CONVERT)
       return SDOperand();
-    SDNode *Pair = Base;
-    Base = getBuildPairElt(Pair, 0);
-    if (!ISD::isNON_EXTLoad(Base))
-      return SDOperand();
-    SDNode *NextLD = getBuildPairElt(Pair, 1);
-    if (!ISD::isNON_EXTLoad(NextLD) ||
-        !TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI))
+    Base = Base->getOperand(0).Val;
+    if (!isa<LoadSDNode>(Base))
       return SDOperand();
   }
-  LoadSDNode *LD = cast<LoadSDNode>(Base);
 
   // Transform it into VZEXT_LOAD addr.
+  LoadSDNode *LD = cast<LoadSDNode>(Base);
   return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
 }