blob: 0f1cae42d10f7626c1813c23b5e97344f1d56ec4 [file] [log] [blame]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
7;
8; Combine tests involving SSE3/SSSE3 target shuffles (MOVDDUP, MOVSHDUP, MOVSLDUP, PSHUFB)
9
10declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
11
Simon Pilgrim7eedee92016-04-24 13:45:30 +000012define <16 x i8> @combine_vpshufb_zero(<16 x i8> %a0) {
13; SSE-LABEL: combine_vpshufb_zero:
14; SSE: # BB#0:
Simon Pilgrim9f5697e2016-04-24 14:53:54 +000015; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim7eedee92016-04-24 13:45:30 +000016; SSE-NEXT: retq
17;
18; AVX-LABEL: combine_vpshufb_zero:
19; AVX: # BB#0:
Simon Pilgrim9f5697e2016-04-24 14:53:54 +000020; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7eedee92016-04-24 13:45:30 +000021; AVX-NEXT: retq
22 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
23 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
24 %res2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res1, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 ret <16 x i8> %res2
26}
27
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000028define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) {
29; SSE-LABEL: combine_pshufb_movddup:
30; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000031; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000032; SSE-NEXT: retq
33;
34; AVX-LABEL: combine_pshufb_movddup:
35; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000036; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000037; AVX-NEXT: retq
38 %1 = bitcast <4 x float> %a0 to <16 x i8>
39 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
40 %3 = bitcast <16 x i8> %2 to <4 x float>
41 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
42 ret <4 x float> %4
43}
44
45define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) {
46; SSE-LABEL: combine_pshufb_movshdup:
47; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000048; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000049; SSE-NEXT: retq
50;
51; AVX-LABEL: combine_pshufb_movshdup:
52; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000053; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000054; AVX-NEXT: retq
55 %1 = bitcast <4 x float> %a0 to <16 x i8>
56 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
57 %3 = bitcast <16 x i8> %2 to <4 x float>
58 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
59 ret <4 x float> %4
60}
61
62define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) {
63; SSE-LABEL: combine_pshufb_movsldup:
64; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000065; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000066; SSE-NEXT: retq
67;
68; AVX-LABEL: combine_pshufb_movsldup:
69; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000070; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000071; AVX-NEXT: retq
72 %1 = bitcast <4 x float> %a0 to <16 x i8>
73 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
74 %3 = bitcast <16 x i8> %2 to <4 x float>
75 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
76 ret <4 x float> %4
77}
Simon Pilgrim537907f2016-03-02 14:16:50 +000078
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000079define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
80; SSE-LABEL: combine_unpckl_arg0_pshufb:
Simon Pilgrim537907f2016-03-02 14:16:50 +000081; SSE: # BB#0:
Simon Pilgrim537907f2016-03-02 14:16:50 +000082; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
83; SSE-NEXT: retq
84;
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000085; AVX-LABEL: combine_unpckl_arg0_pshufb:
Simon Pilgrim537907f2016-03-02 14:16:50 +000086; AVX: # BB#0:
Simon Pilgrim537907f2016-03-02 14:16:50 +000087; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
88; AVX-NEXT: retq
89 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
90 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1>)
91 ret <16 x i8> %2
92}
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000093
94define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
95; SSE-LABEL: combine_unpckl_arg1_pshufb:
96; SSE: # BB#0:
Simon Pilgrim16d11782016-03-10 11:23:51 +000097; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
98; SSE-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000099; SSE-NEXT: retq
100;
101; AVX-LABEL: combine_unpckl_arg1_pshufb:
102; AVX: # BB#0:
Simon Pilgrim16d11782016-03-10 11:23:51 +0000103; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +0000104; AVX-NEXT: retq
105 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
106 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1>)
107 ret <16 x i8> %2
108}