blob: 3ada4fbd16622c2d8757cc233eab9a9b311a270c [file] [log] [blame]
Simon Pilgrim5779fb62016-04-29 08:53:35 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +00002; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; Verify that instcombine is able to fold identity shuffles.
5
6define <16 x i8> @identity_test(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +00007; CHECK-LABEL: @identity_test(
8; CHECK-NEXT: ret <16 x i8> %InVec
9;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000010 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
11 ret <16 x i8> %1
12}
13
14define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000015; CHECK-LABEL: @identity_test_avx2(
16; CHECK-NEXT: ret <32 x i8> %InVec
17;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000018 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
19 ret <32 x i8> %1
20}
21
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000022; Verify that instcombine is able to fold byte shuffles with zero masks.
23
24define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000025; CHECK-LABEL: @fold_to_zero_vector(
26; CHECK-NEXT: ret <16 x i8> zeroinitializer
27;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000028 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
29 ret <16 x i8> %1
30}
31
32define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000033; CHECK-LABEL: @fold_to_zero_vector_avx2(
34; CHECK-NEXT: ret <32 x i8> zeroinitializer
35;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000036 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
37 ret <32 x i8> %1
38}
39
40; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
41; with a shuffle mask of all zeroes.
42
43define <16 x i8> @splat_test(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000044; CHECK-LABEL: @splat_test(
45; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
46; CHECK-NEXT: ret <16 x i8> [[TMP1]]
47;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000048 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
49 ret <16 x i8> %1
50}
51
52; In the test case below, elements in the low 128-bit lane of the result
53; vector are equal to the lower byte of %InVec (shuffle index 0).
54; Elements in the high 128-bit lane of the result vector are equal to
55; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
56
57define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000058; CHECK-LABEL: @splat_test_avx2(
59; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
60; CHECK-NEXT: ret <32 x i8> [[TMP1]]
61;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000062 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
63 ret <32 x i8> %1
64}
65
66; Each of the byte shuffles in the following tests is equivalent to a blend between
67; vector %InVec and a vector of all zeroes.
68
69define <16 x i8> @blend1(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000070; CHECK-LABEL: @blend1(
71; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
72; CHECK-NEXT: ret <16 x i8> [[TMP1]]
73;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000074 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
75 ret <16 x i8> %1
76}
77
78define <16 x i8> @blend2(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000079; CHECK-LABEL: @blend2(
80; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
81; CHECK-NEXT: ret <16 x i8> [[TMP1]]
82;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000083 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
84 ret <16 x i8> %1
85}
86
87define <16 x i8> @blend3(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000088; CHECK-LABEL: @blend3(
89; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
90; CHECK-NEXT: ret <16 x i8> [[TMP1]]
91;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +000092 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
93 ret <16 x i8> %1
94}
95
96define <16 x i8> @blend4(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +000097; CHECK-LABEL: @blend4(
98; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
99; CHECK-NEXT: ret <16 x i8> [[TMP1]]
100;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000101 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
102 ret <16 x i8> %1
103}
104
105define <16 x i8> @blend5(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000106; CHECK-LABEL: @blend5(
107; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
108; CHECK-NEXT: ret <16 x i8> [[TMP1]]
109;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000110 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
111 ret <16 x i8> %1
112}
113
114define <16 x i8> @blend6(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000115; CHECK-LABEL: @blend6(
116; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
117; CHECK-NEXT: ret <16 x i8> [[TMP1]]
118;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000119 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
120 ret <16 x i8> %1
121}
122
123define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000124; CHECK-LABEL: @blend1_avx2(
125; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
126; CHECK-NEXT: ret <32 x i8> [[TMP1]]
127;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000128 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
129 ret <32 x i8> %1
130}
131
132define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000133; CHECK-LABEL: @blend2_avx2(
134; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
135; CHECK-NEXT: ret <32 x i8> [[TMP1]]
136;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000137 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
138 ret <32 x i8> %1
139}
140
141define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000142; CHECK-LABEL: @blend3_avx2(
143; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
144; CHECK-NEXT: ret <32 x i8> [[TMP1]]
145;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000146 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
147 ret <32 x i8> %1
148}
149
150define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000151; CHECK-LABEL: @blend4_avx2(
152; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
153; CHECK-NEXT: ret <32 x i8> [[TMP1]]
154;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000155 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
156 ret <32 x i8> %1
157}
158
159define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000160; CHECK-LABEL: @blend5_avx2(
161; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
162; CHECK-NEXT: ret <32 x i8> [[TMP1]]
163;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000164 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
165 ret <32 x i8> %1
166}
167
168define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000169; CHECK-LABEL: @blend6_avx2(
170; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
171; CHECK-NEXT: ret <32 x i8> [[TMP1]]
172;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000173 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
174 ret <32 x i8> %1
175}
176
177; movq idiom.
178define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000179; CHECK-LABEL: @movq_idiom(
180; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
181; CHECK-NEXT: ret <16 x i8> [[TMP1]]
182;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000183 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
184 ret <16 x i8> %1
185}
186
187define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000188; CHECK-LABEL: @movq_idiom_avx2(
189; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
190; CHECK-NEXT: ret <32 x i8> [[TMP1]]
191;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000192 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
193 ret <32 x i8> %1
194}
195
196; Vector permutations using byte shuffles.
197
198define <16 x i8> @permute1(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000199; CHECK-LABEL: @permute1(
200; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
201; CHECK-NEXT: ret <16 x i8> [[TMP1]]
202;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000203 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
204 ret <16 x i8> %1
205}
206
207define <16 x i8> @permute2(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000208; CHECK-LABEL: @permute2(
209; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
210; CHECK-NEXT: ret <16 x i8> [[TMP1]]
211;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000212 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
213 ret <16 x i8> %1
214}
215
216define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000217; CHECK-LABEL: @permute1_avx2(
218; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
219; CHECK-NEXT: ret <32 x i8> [[TMP1]]
220;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000221 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
222 ret <32 x i8> %1
223}
224
225define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000226; CHECK-LABEL: @permute2_avx2(
227; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
228; CHECK-NEXT: ret <32 x i8> [[TMP1]]
229;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000230 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
231 ret <32 x i8> %1
232}
233
234; Test that instcombine correctly folds a pshufb with values that
235; are not -128 and that are not encoded in four bits.
236
237define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000238; CHECK-LABEL: @identity_test2_2(
239; CHECK-NEXT: ret <16 x i8> %InVec
240;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000241 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
242 ret <16 x i8> %1
243}
244
245define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000246; CHECK-LABEL: @identity_test_avx2_2(
247; CHECK-NEXT: ret <32 x i8> %InVec
248;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000249 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
250 ret <32 x i8> %1
251}
252
253define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000254; CHECK-LABEL: @fold_to_zero_vector_2(
255; CHECK-NEXT: ret <16 x i8> zeroinitializer
256;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000257 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
258 ret <16 x i8> %1
259}
260
261define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000262; CHECK-LABEL: @fold_to_zero_vector_avx2_2(
263; CHECK-NEXT: ret <32 x i8> zeroinitializer
264;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000265 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
266 ret <32 x i8> %1
267}
268
269define <16 x i8> @permute3(<16 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000270; CHECK-LABEL: @permute3(
271; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
272; CHECK-NEXT: ret <16 x i8> [[TMP1]]
273;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000274 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
275 ret <16 x i8> %1
276}
277
278define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
Simon Pilgrim5779fb62016-04-29 08:53:35 +0000279; CHECK-LABEL: @permute3_avx2(
280; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
281; CHECK-NEXT: ret <32 x i8> [[TMP1]]
282;
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000283 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
284 ret <32 x i8> %1
285}
286
Simon Pilgrim07a691c2016-04-29 09:13:53 +0000287; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements.
288
289define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
290; CHECK-LABEL: @fold_with_undef_elts(
Simon Pilgrime5e8c2f2016-05-01 19:26:21 +0000291; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
Simon Pilgrim07a691c2016-04-29 09:13:53 +0000292; CHECK-NEXT: ret <16 x i8> [[TMP1]]
293;
294 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
295 ret <16 x i8> %1
296}
297
298define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
299; CHECK-LABEL: @fold_with_undef_elts_avx2(
Simon Pilgrime5e8c2f2016-05-01 19:26:21 +0000300; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
Simon Pilgrim07a691c2016-04-29 09:13:53 +0000301; CHECK-NEXT: ret <32 x i8> [[TMP1]]
302;
303 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
304 ret <32 x i8> %1
305}
306
307define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
308; CHECK-LABEL: @fold_with_allundef_elts(
Simon Pilgrime5e8c2f2016-05-01 19:26:21 +0000309; CHECK-NEXT: ret <16 x i8> undef
Simon Pilgrim07a691c2016-04-29 09:13:53 +0000310;
311 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
312 ret <16 x i8> %1
313}
314
315define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
316; CHECK-LABEL: @fold_with_allundef_elts_avx2(
Simon Pilgrime5e8c2f2016-05-01 19:26:21 +0000317; CHECK-NEXT: ret <32 x i8> undef
Simon Pilgrim07a691c2016-04-29 09:13:53 +0000318;
319 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
320 ret <32 x i8> %1
321}
Andrea Di Biagio0594e2a2015-09-30 16:44:39 +0000322
323declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
324declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)