blob: ba7a9a25d84829ab34c3cb59663f0dd9161b2dcf [file] [log] [blame]
Simon Pilgrim476560a2016-10-18 19:28:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
Zvi Rackover72b0bb12018-01-09 16:26:06 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2-FAST
Simon Pilgrim476560a2016-10-18 19:28:12 +00005
6; fold (srl 0, x) -> 0
7define <4 x i32> @combine_vec_lshr_zero(<4 x i32> %x) {
8; SSE-LABEL: combine_vec_lshr_zero:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00009; SSE: # %bb.0:
Simon Pilgrimc29af822017-05-10 12:34:27 +000010; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000011; SSE-NEXT: retq
12;
13; AVX-LABEL: combine_vec_lshr_zero:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000014; AVX: # %bb.0:
Simon Pilgrimc29af822017-05-10 12:34:27 +000015; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000016; AVX-NEXT: retq
17 %1 = lshr <4 x i32> zeroinitializer, %x
18 ret <4 x i32> %1
19}
20
21; fold (srl x, c >= size(x)) -> undef
22define <4 x i32> @combine_vec_lshr_outofrange0(<4 x i32> %x) {
23; SSE-LABEL: combine_vec_lshr_outofrange0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000024; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000025; SSE-NEXT: retq
26;
27; AVX-LABEL: combine_vec_lshr_outofrange0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000028; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000029; AVX-NEXT: retq
30 %1 = lshr <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33>
31 ret <4 x i32> %1
32}
33
34define <4 x i32> @combine_vec_lshr_outofrange1(<4 x i32> %x) {
35; SSE-LABEL: combine_vec_lshr_outofrange1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000037; SSE-NEXT: retq
38;
39; AVX-LABEL: combine_vec_lshr_outofrange1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000040; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000041; AVX-NEXT: retq
42 %1 = lshr <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36>
43 ret <4 x i32> %1
44}
45
46; fold (srl x, 0) -> x
47define <4 x i32> @combine_vec_lshr_by_zero(<4 x i32> %x) {
48; SSE-LABEL: combine_vec_lshr_by_zero:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000049; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000050; SSE-NEXT: retq
51;
52; AVX-LABEL: combine_vec_lshr_by_zero:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000053; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000054; AVX-NEXT: retq
55 %1 = lshr <4 x i32> %x, zeroinitializer
56 ret <4 x i32> %1
57}
58
59; if (srl x, c) is known to be zero, return 0
60define <4 x i32> @combine_vec_lshr_known_zero0(<4 x i32> %x) {
61; SSE-LABEL: combine_vec_lshr_known_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000062; SSE: # %bb.0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +000063; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000064; SSE-NEXT: retq
65;
66; AVX-LABEL: combine_vec_lshr_known_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000067; AVX: # %bb.0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +000068; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000069; AVX-NEXT: retq
70 %1 = and <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
71 %2 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
72 ret <4 x i32> %2
73}
74
75define <4 x i32> @combine_vec_lshr_known_zero1(<4 x i32> %x) {
76; SSE-LABEL: combine_vec_lshr_known_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +000078; SSE-NEXT: pand {{.*}}(%rip), %xmm0
79; SSE-NEXT: movdqa %xmm0, %xmm1
80; SSE-NEXT: psrld $11, %xmm1
81; SSE-NEXT: movdqa %xmm0, %xmm2
82; SSE-NEXT: psrld $9, %xmm2
83; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
84; SSE-NEXT: movdqa %xmm0, %xmm1
85; SSE-NEXT: psrld $10, %xmm1
86; SSE-NEXT: psrld $8, %xmm0
87; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
88; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
89; SSE-NEXT: retq
90;
91; AVX-LABEL: combine_vec_lshr_known_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; AVX: # %bb.0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +000093; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
Simon Pilgrim476560a2016-10-18 19:28:12 +000094; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
95; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
96; AVX-NEXT: retq
97 %1 = and <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
98 %2 = lshr <4 x i32> %1, <i32 8, i32 9, i32 10, i32 11>
99 ret <4 x i32> %2
100}
101
102; fold (srl (srl x, c1), c2) -> (srl x, (add c1, c2))
103define <4 x i32> @combine_vec_lshr_lshr0(<4 x i32> %x) {
104; SSE-LABEL: combine_vec_lshr_lshr0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000105; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000106; SSE-NEXT: psrld $6, %xmm0
107; SSE-NEXT: retq
108;
109; AVX-LABEL: combine_vec_lshr_lshr0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000110; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000111; AVX-NEXT: vpsrld $6, %xmm0, %xmm0
112; AVX-NEXT: retq
113 %1 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
114 %2 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
115 ret <4 x i32> %2
116}
117
118define <4 x i32> @combine_vec_lshr_lshr1(<4 x i32> %x) {
119; SSE-LABEL: combine_vec_lshr_lshr1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000120; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000121; SSE-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim29112962017-07-20 11:03:30 +0000122; SSE-NEXT: psrld $10, %xmm1
123; SSE-NEXT: movdqa %xmm0, %xmm2
124; SSE-NEXT: psrld $6, %xmm2
125; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
126; SSE-NEXT: movdqa %xmm0, %xmm1
127; SSE-NEXT: psrld $8, %xmm1
128; SSE-NEXT: psrld $4, %xmm0
129; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
130; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000131; SSE-NEXT: retq
132;
133; AVX-LABEL: combine_vec_lshr_lshr1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000134; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000135; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000136; AVX-NEXT: retq
137 %1 = lshr <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3>
138 %2 = lshr <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7>
139 ret <4 x i32> %2
140}
141
142; fold (srl (srl x, c1), c2) -> 0
143define <4 x i32> @combine_vec_lshr_lshr_zero0(<4 x i32> %x) {
144; SSE-LABEL: combine_vec_lshr_lshr_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000145; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000146; SSE-NEXT: xorps %xmm0, %xmm0
147; SSE-NEXT: retq
148;
149; AVX-LABEL: combine_vec_lshr_lshr_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000150; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000151; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
152; AVX-NEXT: retq
153 %1 = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
154 %2 = lshr <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20>
155 ret <4 x i32> %2
156}
157
158define <4 x i32> @combine_vec_lshr_lshr_zero1(<4 x i32> %x) {
159; SSE-LABEL: combine_vec_lshr_lshr_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000160; SSE: # %bb.0:
Simon Pilgrim29112962017-07-20 11:03:30 +0000161; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000162; SSE-NEXT: retq
163;
164; AVX-LABEL: combine_vec_lshr_lshr_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000165; AVX: # %bb.0:
Simon Pilgrim29112962017-07-20 11:03:30 +0000166; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000167; AVX-NEXT: retq
168 %1 = lshr <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20>
169 %2 = lshr <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28>
170 ret <4 x i32> %2
171}
172
173; fold (srl (trunc (srl x, c1)), c2) -> (trunc (srl x, (add c1, c2)))
174define <4 x i32> @combine_vec_lshr_trunc_lshr0(<4 x i64> %x) {
175; SSE-LABEL: combine_vec_lshr_trunc_lshr0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; SSE: # %bb.0:
Simon Pilgrim7d65b662017-04-25 12:40:45 +0000177; SSE-NEXT: psrlq $48, %xmm1
178; SSE-NEXT: psrlq $48, %xmm0
Simon Pilgrimae1f01342017-11-03 11:33:48 +0000179; SSE-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000180; SSE-NEXT: retq
181;
182; AVX-LABEL: combine_vec_lshr_trunc_lshr0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000183; AVX: # %bb.0:
Simon Pilgrim7d65b662017-04-25 12:40:45 +0000184; AVX-NEXT: vpsrlq $48, %ymm0, %ymm0
Simon Pilgrime152c2c2017-11-01 21:52:29 +0000185; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
186; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000187; AVX-NEXT: vzeroupper
188; AVX-NEXT: retq
189 %1 = lshr <4 x i64> %x, <i64 32, i64 32, i64 32, i64 32>
190 %2 = trunc <4 x i64> %1 to <4 x i32>
191 %3 = lshr <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
192 ret <4 x i32> %3
193}
194
195define <4 x i32> @combine_vec_lshr_trunc_lshr1(<4 x i64> %x) {
196; SSE-LABEL: combine_vec_lshr_trunc_lshr1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000197; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000198; SSE-NEXT: movdqa %xmm1, %xmm2
199; SSE-NEXT: psrlq $35, %xmm2
200; SSE-NEXT: psrlq $34, %xmm1
201; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000202; SSE-NEXT: movdqa %xmm0, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +0000203; SSE-NEXT: psrlq $33, %xmm2
204; SSE-NEXT: psrlq $32, %xmm0
205; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
206; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
207; SSE-NEXT: movaps %xmm0, %xmm1
208; SSE-NEXT: psrld $19, %xmm1
209; SSE-NEXT: movaps %xmm0, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000210; SSE-NEXT: psrld $17, %xmm2
211; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +0000212; SSE-NEXT: movaps %xmm0, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000213; SSE-NEXT: psrld $18, %xmm1
214; SSE-NEXT: psrld $16, %xmm0
215; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
216; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
217; SSE-NEXT: retq
218;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000219; AVX2-SLOW-LABEL: combine_vec_lshr_trunc_lshr1:
220; AVX2-SLOW: # %bb.0:
221; AVX2-SLOW-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
222; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
223; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
224; AVX2-SLOW-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
225; AVX2-SLOW-NEXT: vzeroupper
226; AVX2-SLOW-NEXT: retq
227;
228; AVX2-FAST-LABEL: combine_vec_lshr_trunc_lshr1:
229; AVX2-FAST: # %bb.0:
230; AVX2-FAST-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
231; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
232; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
233; AVX2-FAST-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
234; AVX2-FAST-NEXT: vzeroupper
235; AVX2-FAST-NEXT: retq
Simon Pilgrim476560a2016-10-18 19:28:12 +0000236 %1 = lshr <4 x i64> %x, <i64 32, i64 33, i64 34, i64 35>
237 %2 = trunc <4 x i64> %1 to <4 x i32>
238 %3 = lshr <4 x i32> %2, <i32 16, i32 17, i32 18, i32 19>
239 ret <4 x i32> %3
240}
241
242; fold (srl (trunc (srl x, c1)), c2) -> 0
243define <4 x i32> @combine_vec_lshr_trunc_lshr_zero0(<4 x i64> %x) {
244; SSE-LABEL: combine_vec_lshr_trunc_lshr_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000245; SSE: # %bb.0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +0000246; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000247; SSE-NEXT: retq
248;
249; AVX-LABEL: combine_vec_lshr_trunc_lshr_zero0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000250; AVX: # %bb.0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +0000251; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000252; AVX-NEXT: retq
253 %1 = lshr <4 x i64> %x, <i64 48, i64 48, i64 48, i64 48>
254 %2 = trunc <4 x i64> %1 to <4 x i32>
255 %3 = lshr <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
256 ret <4 x i32> %3
257}
258
259define <4 x i32> @combine_vec_lshr_trunc_lshr_zero1(<4 x i64> %x) {
260; SSE-LABEL: combine_vec_lshr_trunc_lshr_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000261; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000262; SSE-NEXT: movdqa %xmm1, %xmm2
263; SSE-NEXT: psrlq $51, %xmm2
264; SSE-NEXT: psrlq $50, %xmm1
265; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000266; SSE-NEXT: movdqa %xmm0, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +0000267; SSE-NEXT: psrlq $49, %xmm2
268; SSE-NEXT: psrlq $48, %xmm0
269; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
Craig Topper67217d72017-12-04 05:38:42 +0000270; SSE-NEXT: packusdw %xmm1, %xmm0
271; SSE-NEXT: movdqa %xmm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +0000272; SSE-NEXT: psrld $27, %xmm1
Craig Topper67217d72017-12-04 05:38:42 +0000273; SSE-NEXT: movdqa %xmm0, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000274; SSE-NEXT: psrld $25, %xmm2
275; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
Craig Topper67217d72017-12-04 05:38:42 +0000276; SSE-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000277; SSE-NEXT: psrld $26, %xmm1
278; SSE-NEXT: psrld $24, %xmm0
279; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
280; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
281; SSE-NEXT: retq
282;
283; AVX-LABEL: combine_vec_lshr_trunc_lshr_zero1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000284; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000285; AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
Craig Topper67217d72017-12-04 05:38:42 +0000286; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
287; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000288; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
289; AVX-NEXT: vzeroupper
290; AVX-NEXT: retq
291 %1 = lshr <4 x i64> %x, <i64 48, i64 49, i64 50, i64 51>
292 %2 = trunc <4 x i64> %1 to <4 x i32>
293 %3 = lshr <4 x i32> %2, <i32 24, i32 25, i32 26, i32 27>
294 ret <4 x i32> %3
295}
296
297; fold (srl (shl x, c), c) -> (and x, cst2)
298define <4 x i32> @combine_vec_lshr_shl_mask0(<4 x i32> %x) {
299; SSE-LABEL: combine_vec_lshr_shl_mask0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000300; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000301; SSE-NEXT: andps {{.*}}(%rip), %xmm0
302; SSE-NEXT: retq
303;
304; AVX-LABEL: combine_vec_lshr_shl_mask0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000305; AVX: # %bb.0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000306; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1073741823,1073741823,1073741823,1073741823]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000307; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
308; AVX-NEXT: retq
309 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
310 %2 = lshr <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
311 ret <4 x i32> %2
312}
313
314define <4 x i32> @combine_vec_lshr_shl_mask1(<4 x i32> %x) {
315; SSE-LABEL: combine_vec_lshr_shl_mask1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000316; SSE: # %bb.0:
Simon Pilgrim618d3ae2016-10-20 11:10:21 +0000317; SSE-NEXT: andps {{.*}}(%rip), %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000318; SSE-NEXT: retq
319;
320; AVX-LABEL: combine_vec_lshr_shl_mask1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000321; AVX: # %bb.0:
Simon Pilgrim618d3ae2016-10-20 11:10:21 +0000322; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000323; AVX-NEXT: retq
324 %1 = shl <4 x i32> %x, <i32 2, i32 3, i32 4, i32 5>
325 %2 = lshr <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
326 ret <4 x i32> %2
327}
328
329; fold (srl (sra X, Y), 31) -> (srl X, 31)
330define <4 x i32> @combine_vec_lshr_ashr_sign(<4 x i32> %x, <4 x i32> %y) {
331; SSE-LABEL: combine_vec_lshr_ashr_sign:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000332; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000333; SSE-NEXT: psrld $31, %xmm0
334; SSE-NEXT: retq
335;
336; AVX-LABEL: combine_vec_lshr_ashr_sign:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000337; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000338; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
339; AVX-NEXT: retq
340 %1 = ashr <4 x i32> %x, %y
341 %2 = lshr <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31>
342 ret <4 x i32> %2
343}
344
345; fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
346define <4 x i32> @combine_vec_lshr_lzcnt_bit0(<4 x i32> %x) {
347; SSE-LABEL: combine_vec_lshr_lzcnt_bit0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000348; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000349; SSE-NEXT: pand {{.*}}(%rip), %xmm0
350; SSE-NEXT: psrld $4, %xmm0
351; SSE-NEXT: pxor {{.*}}(%rip), %xmm0
352; SSE-NEXT: retq
353;
354; AVX-LABEL: combine_vec_lshr_lzcnt_bit0:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000355; AVX: # %bb.0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000356; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000357; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
358; AVX-NEXT: vpsrld $4, %xmm0, %xmm0
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000359; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000360; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
361; AVX-NEXT: retq
362 %1 = and <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
363 %2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %1, i1 0)
364 %3 = lshr <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
365 ret <4 x i32> %3
366}
367
368define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
369; SSE-LABEL: combine_vec_lshr_lzcnt_bit1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000370; SSE: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000371; SSE-NEXT: pand {{.*}}(%rip), %xmm0
372; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
373; SSE-NEXT: movdqa %xmm0, %xmm1
374; SSE-NEXT: pand %xmm2, %xmm1
375; SSE-NEXT: movdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
376; SSE-NEXT: movdqa %xmm3, %xmm4
377; SSE-NEXT: pshufb %xmm1, %xmm4
378; SSE-NEXT: movdqa %xmm0, %xmm1
379; SSE-NEXT: psrlw $4, %xmm1
380; SSE-NEXT: pand %xmm2, %xmm1
381; SSE-NEXT: pxor %xmm2, %xmm2
382; SSE-NEXT: pshufb %xmm1, %xmm3
383; SSE-NEXT: pcmpeqb %xmm2, %xmm1
384; SSE-NEXT: pand %xmm4, %xmm1
385; SSE-NEXT: paddb %xmm3, %xmm1
386; SSE-NEXT: movdqa %xmm0, %xmm3
387; SSE-NEXT: pcmpeqb %xmm2, %xmm3
388; SSE-NEXT: psrlw $8, %xmm3
389; SSE-NEXT: pand %xmm1, %xmm3
390; SSE-NEXT: psrlw $8, %xmm1
391; SSE-NEXT: paddw %xmm3, %xmm1
392; SSE-NEXT: pcmpeqw %xmm2, %xmm0
393; SSE-NEXT: psrld $16, %xmm0
394; SSE-NEXT: pand %xmm1, %xmm0
395; SSE-NEXT: psrld $16, %xmm1
396; SSE-NEXT: paddd %xmm0, %xmm1
397; SSE-NEXT: psrld $5, %xmm1
398; SSE-NEXT: movdqa %xmm1, %xmm0
399; SSE-NEXT: retq
400;
401; AVX-LABEL: combine_vec_lshr_lzcnt_bit1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000402; AVX: # %bb.0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000403; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
404; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
405; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
406; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
407; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
408; AVX-NEXT: vpsrlw $4, %xmm0, %xmm4
409; AVX-NEXT: vpand %xmm1, %xmm4, %xmm1
410; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
411; AVX-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm5
412; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
413; AVX-NEXT: vpshufb %xmm1, %xmm3, %xmm1
414; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
415; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
416; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
417; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
418; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
419; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
420; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
421; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
422; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
423; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
424; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
425; AVX-NEXT: vpsrld $5, %xmm0, %xmm0
426; AVX-NEXT: retq
427 %1 = and <4 x i32> %x, <i32 4, i32 32, i32 64, i32 128>
428 %2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %1, i1 0)
429 %3 = lshr <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
430 ret <4 x i32> %3
431}
432declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
433
434; fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
435define <4 x i32> @combine_vec_lshr_trunc_and(<4 x i32> %x, <4 x i64> %y) {
436; SSE-LABEL: combine_vec_lshr_trunc_and:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000437; SSE: # %bb.0:
Sanjay Patela0d8a272016-12-15 18:03:38 +0000438; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
439; SSE-NEXT: andps {{.*}}(%rip), %xmm1
440; SSE-NEXT: movaps %xmm1, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000441; SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
442; SSE-NEXT: movdqa %xmm0, %xmm3
443; SSE-NEXT: psrld %xmm2, %xmm3
Sanjay Patela0d8a272016-12-15 18:03:38 +0000444; SSE-NEXT: movaps %xmm1, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000445; SSE-NEXT: psrlq $32, %xmm2
446; SSE-NEXT: movdqa %xmm0, %xmm4
447; SSE-NEXT: psrld %xmm2, %xmm4
448; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
449; SSE-NEXT: pxor %xmm2, %xmm2
450; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
451; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
452; SSE-NEXT: movdqa %xmm0, %xmm2
453; SSE-NEXT: psrld %xmm1, %xmm2
454; SSE-NEXT: psrld %xmm3, %xmm0
455; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
456; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
457; SSE-NEXT: retq
458;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000459; AVX2-SLOW-LABEL: combine_vec_lshr_trunc_and:
460; AVX2-SLOW: # %bb.0:
461; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
462; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
463; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
464; AVX2-SLOW-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
465; AVX2-SLOW-NEXT: vzeroupper
466; AVX2-SLOW-NEXT: retq
467;
468; AVX2-FAST-LABEL: combine_vec_lshr_trunc_and:
469; AVX2-FAST: # %bb.0:
470; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
471; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
472; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
473; AVX2-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
474; AVX2-FAST-NEXT: vzeroupper
475; AVX2-FAST-NEXT: retq
Simon Pilgrim476560a2016-10-18 19:28:12 +0000476 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535>
477 %2 = trunc <4 x i64> %1 to <4 x i32>
478 %3 = lshr <4 x i32> %x, %2
479 ret <4 x i32> %3
480}