[X86][SSE] Add support for lowering shuffles to PACKSS/PACKUS
If the upper bits of a truncation shuffle patterns have at least the minimum number of sign/zero bits on their inputs then we can safely use PACKSS/PACKUS as shuffles.
Partial fix for https://bugs.llvm.org/show_bug.cgi?id=34773
Differential Revision: https://reviews.llvm.org/D38472
llvm-svn: 314788
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
index 43ef63a..ab74290 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
@@ -149,41 +149,18 @@
 }
 
 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
-; SSE2-LABEL: v16i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pcmpgtw %xmm3, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm3, %xmm1
-; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
-; SSE2-NEXT:    pand %xmm3, %xmm0
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pcmpgtw %xmm7, %xmm5
-; SSE2-NEXT:    pand %xmm3, %xmm5
-; SSE2-NEXT:    pcmpgtw %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
-; SSE2-NEXT:    packuswb %xmm5, %xmm4
-; SSE2-NEXT:    pand %xmm0, %xmm4
-; SSE2-NEXT:    pmovmskb %xmm4, %eax
-; SSE2-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: v16i16:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pcmpgtw %xmm3, %xmm1
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT:    pshufb %xmm3, %xmm1
-; SSSE3-NEXT:    pcmpgtw %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm3, %xmm0
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    pcmpgtw %xmm7, %xmm5
-; SSSE3-NEXT:    pshufb %xmm3, %xmm5
-; SSSE3-NEXT:    pcmpgtw %xmm6, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
-; SSSE3-NEXT:    pand %xmm0, %xmm4
-; SSSE3-NEXT:    pmovmskb %xmm4, %eax
-; SSSE3-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; SSSE3-NEXT:    retq
+; SSE2-SSSE3-LABEL: v16i16:
+; SSE2-SSSE3:       # BB#0:
+; SSE2-SSSE3-NEXT:    pcmpgtw %xmm3, %xmm1
+; SSE2-SSSE3-NEXT:    pcmpgtw %xmm2, %xmm0
+; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT:    pcmpgtw %xmm7, %xmm5
+; SSE2-SSSE3-NEXT:    pcmpgtw %xmm6, %xmm4
+; SSE2-SSSE3-NEXT:    packsswb %xmm5, %xmm4
+; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm4
+; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %eax
+; SSE2-SSSE3-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT:    retq
 ;
 ; AVX1-LABEL: v16i16:
 ; AVX1:       # BB#0:
@@ -236,42 +213,33 @@
 ; SSE2-LABEL: v8i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    packssdw %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    packssdw %xmm0, %xmm0
 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    pcmpgtd %xmm7, %xmm5
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    packssdw %xmm0, %xmm5
 ; SSE2-NEXT:    pcmpgtd %xmm6, %xmm4
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm4[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
-; SSE2-NEXT:    packuswb %xmm2, %xmm2
-; SSE2-NEXT:    pmovmskb %xmm2, %eax
+; SSE2-NEXT:    packssdw %xmm0, %xmm4
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; SSE2-NEXT:    pand %xmm0, %xmm4
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm4
+; SSE2-NEXT:    packuswb %xmm4, %xmm4
+; SSE2-NEXT:    pmovmskb %xmm4, %eax
 ; SSE2-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: v8i32:
 ; SSSE3:       # BB#0:
 ; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm1
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT:    pshufb %xmm3, %xmm1
+; SSSE3-NEXT:    packssdw %xmm0, %xmm1
 ; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm3, %xmm0
+; SSSE3-NEXT:    packssdw %xmm0, %xmm0
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
-; SSSE3-NEXT:    pshufb %xmm3, %xmm5
+; SSSE3-NEXT:    packssdw %xmm0, %xmm5
 ; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    packssdw %xmm0, %xmm4
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
 ; SSSE3-NEXT:    pand %xmm0, %xmm4
 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]