Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index acc00fd..f33946c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2383,6 +2383,31 @@
                          DAG.getConstant(Sum, N1C->getValueType(0)));
     }
   }
+
+  // fold sra (shl X, m), result_size - n
+  // -> (sign_extend (trunc (shl X, result_size - n - m))) for
+  // result_size - n != m. If truncate is free for the target sext(shl) is
+  // likely to result in better code.
+  if (N0.getOpcode() == ISD::SHL) {
+    // Get the two constanst of the shifts, CN0 = m, CN = n.
+    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N01C && N1C) {
+      // Determine if the truncate type's bitsize would correspond to
+      // an integer type for this target.
+      unsigned VTValSize = MVT::getSizeInBits(VT);
+      MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue());
+      unsigned ShiftAmt = N1C->getValue() - N01C->getValue();
+
+      // If the shift wouldn't be a noop, the truncated type is an actual type,
+      // and the truncate is free, then proceed with the transform.
+      if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) {
+        SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy());
+        SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt);
+        SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift);
+        return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc);
+      }
+    }
+  }
   
   // Simplify, based on bits shifted out of the LHS. 
   if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 1588b49..ceda932 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1647,47 +1647,3 @@
 if it commuted the addl in LBB1_1.
 
 //===---------------------------------------------------------------------===//
-
-These two functions perform identical operations:
-
-define i32 @test(i32 %f12) {
-	%tmp7.25 = lshr i32 %f12, 16		
-	%tmp7.26 = trunc i32 %tmp7.25 to i8
-	%tmp78.2 = sext i8 %tmp7.26 to i32
-	ret i32 %tmp78.2
-}
-
-define i32 @test2(i32 %f12) {
-	%f11 = shl i32 %f12, 8
-	%tmp7.25 = ashr i32 %f11, 24
-	ret i32 %tmp7.25
-}
-
-but the first compiles into significantly better code on x86-32:
-
-_test:
-	movsbl	6(%esp), %eax
-	ret
-_test2:
-	movl	4(%esp), %eax
-	shll	$8, %eax
-	sarl	$24, %eax
-	ret
-        
-and on x86-64:
-
-_test:
-	shrl	$16, %edi
-	movsbl	%dil, %eax
-	ret
-_test2:
-	shll	$8, %edi
-	movl	%edi, %eax
-	sarl	$24, %eax
-	ret
-
-I would like instcombine to canonicalize the first into the second (since it is
-shorter and doesn't involve type width changes) but the x86 backend needs to do
-the right thing with the later sequence first.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5a05aba..93fb802 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5662,7 +5662,7 @@
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  if (NumBits1 <= NumBits2)
+  if (NumBits1 <= NumBits2 || NumBits2 < 8)
     return false;
   return Subtarget->is64Bit() || NumBits1 < 64;
 }
@@ -5673,7 +5673,7 @@
     return false;
   unsigned NumBits1 = MVT::getSizeInBits(VT1);
   unsigned NumBits2 = MVT::getSizeInBits(VT2);
-  if (NumBits1 <= NumBits2)
+  if (NumBits1 <= NumBits2 || NumBits2 < 8)
     return false;
   return Subtarget->is64Bit() || NumBits1 < 64;
 }
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index bb46bc5..c4f9587 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,6 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
 ; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
-; XFAIL: *
 
 define i32 @test(i32 %f12) {
 	%tmp7.25 = lshr i32 %f12, 16