blob: cee102cb6c836ea115a497a99e6d22460302c5f6 [file] [log] [blame]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
7;
8; Combine tests involving SSE3/SSSE3 target shuffles (MOVDDUP, MOVSHDUP, MOVSLDUP, PSHUFB)
9
10declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
11
Simon Pilgrim7eedee92016-04-24 13:45:30 +000012define <16 x i8> @combine_vpshufb_zero(<16 x i8> %a0) {
13; SSE-LABEL: combine_vpshufb_zero:
14; SSE: # BB#0:
15; SSE-NEXT: movl $128, %eax
16; SSE-NEXT: movd %eax, %xmm1
17; SSE-NEXT: pshufb %xmm1, %xmm0
18; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
19; SSE-NEXT: retq
20;
21; AVX-LABEL: combine_vpshufb_zero:
22; AVX: # BB#0:
23; AVX-NEXT: movl $128, %eax
24; AVX-NEXT: vmovd %eax, %xmm1
25; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
26; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
27; AVX-NEXT: retq
28 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
29 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
30 %res2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res1, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
31 ret <16 x i8> %res2
32}
33
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000034define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) {
35; SSE-LABEL: combine_pshufb_movddup:
36; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000037; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000038; SSE-NEXT: retq
39;
40; AVX-LABEL: combine_pshufb_movddup:
41; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000042; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000043; AVX-NEXT: retq
44 %1 = bitcast <4 x float> %a0 to <16 x i8>
45 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
46 %3 = bitcast <16 x i8> %2 to <4 x float>
47 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
48 ret <4 x float> %4
49}
50
51define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) {
52; SSE-LABEL: combine_pshufb_movshdup:
53; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000054; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000055; SSE-NEXT: retq
56;
57; AVX-LABEL: combine_pshufb_movshdup:
58; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000059; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000060; AVX-NEXT: retq
61 %1 = bitcast <4 x float> %a0 to <16 x i8>
62 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
63 %3 = bitcast <16 x i8> %2 to <4 x float>
64 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
65 ret <4 x float> %4
66}
67
68define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) {
69; SSE-LABEL: combine_pshufb_movsldup:
70; SSE: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000071; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000072; SSE-NEXT: retq
73;
74; AVX-LABEL: combine_pshufb_movsldup:
75; AVX: # BB#0:
Simon Pilgrime4178ae2016-02-25 09:12:12 +000076; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1]
Simon Pilgrimcd25a2b2016-02-24 17:08:59 +000077; AVX-NEXT: retq
78 %1 = bitcast <4 x float> %a0 to <16 x i8>
79 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
80 %3 = bitcast <16 x i8> %2 to <4 x float>
81 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
82 ret <4 x float> %4
83}
Simon Pilgrim537907f2016-03-02 14:16:50 +000084
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000085define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
86; SSE-LABEL: combine_unpckl_arg0_pshufb:
Simon Pilgrim537907f2016-03-02 14:16:50 +000087; SSE: # BB#0:
Simon Pilgrim537907f2016-03-02 14:16:50 +000088; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
89; SSE-NEXT: retq
90;
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000091; AVX-LABEL: combine_unpckl_arg0_pshufb:
Simon Pilgrim537907f2016-03-02 14:16:50 +000092; AVX: # BB#0:
Simon Pilgrim537907f2016-03-02 14:16:50 +000093; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
94; AVX-NEXT: retq
95 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
96 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1>)
97 ret <16 x i8> %2
98}
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +000099
100define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
101; SSE-LABEL: combine_unpckl_arg1_pshufb:
102; SSE: # BB#0:
Simon Pilgrim16d11782016-03-10 11:23:51 +0000103; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
104; SSE-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +0000105; SSE-NEXT: retq
106;
107; AVX-LABEL: combine_unpckl_arg1_pshufb:
108; AVX: # BB#0:
Simon Pilgrim16d11782016-03-10 11:23:51 +0000109; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
Simon Pilgrimb4b90fb2016-03-04 11:15:23 +0000110; AVX-NEXT: retq
111 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
112 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1>)
113 ret <16 x i8> %2
114}