blob: 1af2fcb73c66e315e96b467e74eba9c006ad794e [file] [log] [blame]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
7;
8; Combine tests involving SSE3/SSSE3 target shuffles (MOVDDUP, MOVSHDUP, MOVSLDUP, PSHUFB)
9
10declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
11
12define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) {
13; SSE-LABEL: combine_pshufb_movddup:
14; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000015; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000016; SSE-NEXT: retq
17;
18; AVX-LABEL: combine_pshufb_movddup:
19; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000020; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000021; AVX-NEXT: retq
22 %1 = bitcast <4 x float> %a0 to <16 x i8>
23 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
24 %3 = bitcast <16 x i8> %2 to <4 x float>
25 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
26 ret <4 x float> %4
27}
28
29define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) {
30; SSE-LABEL: combine_pshufb_movshdup:
31; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000032; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000033; SSE-NEXT: retq
34;
35; AVX-LABEL: combine_pshufb_movshdup:
36; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000037; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000038; AVX-NEXT: retq
39 %1 = bitcast <4 x float> %a0 to <16 x i8>
40 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
41 %3 = bitcast <16 x i8> %2 to <4 x float>
42 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
43 ret <4 x float> %4
44}
45
46define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) {
47; SSE-LABEL: combine_pshufb_movsldup:
48; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000049; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000050; SSE-NEXT: retq
51;
52; AVX-LABEL: combine_pshufb_movsldup:
53; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000054; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000055; AVX-NEXT: retq
56 %1 = bitcast <4 x float> %a0 to <16 x i8>
57 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
58 %3 = bitcast <16 x i8> %2 to <4 x float>
59 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
60 ret <4 x float> %4
61}
Simon Pilgrim537907f2016-03-02 14:16:50 +000062
63define <16 x i8> @combine_unpckl_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
64; SSE-LABEL: combine_unpckl_pshufb:
65; SSE: # BB#0:
66; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
67; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
68; SSE-NEXT: retq
69;
70; AVX-LABEL: combine_unpckl_pshufb:
71; AVX: # BB#0:
72; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
73; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
74; AVX-NEXT: retq
75 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
76 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1>)
77 ret <16 x i8> %2
78}