blob: 473fae19f4fd6ded97bb4759dd68d1c87532725e [file] [log] [blame]
Simon Pilgrim476560a2016-10-18 19:28:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5; fold (srl 0, x) -> 0
6define <4 x i32> @combine_vec_lshr_zero(<4 x i32> %x) {
7; SSE-LABEL: combine_vec_lshr_zero:
8; SSE: # BB#0:
Simon Pilgrimc29af822017-05-10 12:34:27 +00009; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000010; SSE-NEXT: retq
11;
12; AVX-LABEL: combine_vec_lshr_zero:
13; AVX: # BB#0:
Simon Pilgrimc29af822017-05-10 12:34:27 +000014; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000015; AVX-NEXT: retq
16 %1 = lshr <4 x i32> zeroinitializer, %x
17 ret <4 x i32> %1
18}
19
20; fold (srl x, c >= size(x)) -> undef
21define <4 x i32> @combine_vec_lshr_outofrange0(<4 x i32> %x) {
22; SSE-LABEL: combine_vec_lshr_outofrange0:
23; SSE: # BB#0:
24; SSE-NEXT: retq
25;
26; AVX-LABEL: combine_vec_lshr_outofrange0:
27; AVX: # BB#0:
28; AVX-NEXT: retq
29 %1 = lshr <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33>
30 ret <4 x i32> %1
31}
32
33define <4 x i32> @combine_vec_lshr_outofrange1(<4 x i32> %x) {
34; SSE-LABEL: combine_vec_lshr_outofrange1:
35; SSE: # BB#0:
36; SSE-NEXT: xorps %xmm0, %xmm0
37; SSE-NEXT: retq
38;
39; AVX-LABEL: combine_vec_lshr_outofrange1:
40; AVX: # BB#0:
41; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
42; AVX-NEXT: retq
43 %1 = lshr <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36>
44 ret <4 x i32> %1
45}
46
47; fold (srl x, 0) -> x
48define <4 x i32> @combine_vec_lshr_by_zero(<4 x i32> %x) {
49; SSE-LABEL: combine_vec_lshr_by_zero:
50; SSE: # BB#0:
51; SSE-NEXT: retq
52;
53; AVX-LABEL: combine_vec_lshr_by_zero:
54; AVX: # BB#0:
55; AVX-NEXT: retq
56 %1 = lshr <4 x i32> %x, zeroinitializer
57 ret <4 x i32> %1
58}
59
60; if (srl x, c) is known to be zero, return 0
61define <4 x i32> @combine_vec_lshr_known_zero0(<4 x i32> %x) {
62; SSE-LABEL: combine_vec_lshr_known_zero0:
63; SSE: # BB#0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +000064; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000065; SSE-NEXT: retq
66;
67; AVX-LABEL: combine_vec_lshr_known_zero0:
68; AVX: # BB#0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +000069; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000070; AVX-NEXT: retq
71 %1 = and <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
72 %2 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
73 ret <4 x i32> %2
74}
75
76define <4 x i32> @combine_vec_lshr_known_zero1(<4 x i32> %x) {
77; SSE-LABEL: combine_vec_lshr_known_zero1:
78; SSE: # BB#0:
79; SSE-NEXT: pand {{.*}}(%rip), %xmm0
80; SSE-NEXT: movdqa %xmm0, %xmm1
81; SSE-NEXT: psrld $11, %xmm1
82; SSE-NEXT: movdqa %xmm0, %xmm2
83; SSE-NEXT: psrld $9, %xmm2
84; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
85; SSE-NEXT: movdqa %xmm0, %xmm1
86; SSE-NEXT: psrld $10, %xmm1
87; SSE-NEXT: psrld $8, %xmm0
88; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
89; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
90; SSE-NEXT: retq
91;
92; AVX-LABEL: combine_vec_lshr_known_zero1:
93; AVX: # BB#0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +000094; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
Simon Pilgrim476560a2016-10-18 19:28:12 +000095; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
96; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
97; AVX-NEXT: retq
98 %1 = and <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
99 %2 = lshr <4 x i32> %1, <i32 8, i32 9, i32 10, i32 11>
100 ret <4 x i32> %2
101}
102
103; fold (srl (srl x, c1), c2) -> (srl x, (add c1, c2))
104define <4 x i32> @combine_vec_lshr_lshr0(<4 x i32> %x) {
105; SSE-LABEL: combine_vec_lshr_lshr0:
106; SSE: # BB#0:
107; SSE-NEXT: psrld $6, %xmm0
108; SSE-NEXT: retq
109;
110; AVX-LABEL: combine_vec_lshr_lshr0:
111; AVX: # BB#0:
112; AVX-NEXT: vpsrld $6, %xmm0, %xmm0
113; AVX-NEXT: retq
114 %1 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
115 %2 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
116 ret <4 x i32> %2
117}
118
119define <4 x i32> @combine_vec_lshr_lshr1(<4 x i32> %x) {
120; SSE-LABEL: combine_vec_lshr_lshr1:
121; SSE: # BB#0:
122; SSE-NEXT: movdqa %xmm0, %xmm2
123; SSE-NEXT: movdqa %xmm0, %xmm1
124; SSE-NEXT: psrld $2, %xmm1
125; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
126; SSE-NEXT: psrld $3, %xmm0
127; SSE-NEXT: psrld $1, %xmm2
128; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
129; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
130; SSE-NEXT: movdqa %xmm1, %xmm0
131; SSE-NEXT: psrld $7, %xmm0
132; SSE-NEXT: movdqa %xmm1, %xmm2
133; SSE-NEXT: psrld $5, %xmm2
134; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
135; SSE-NEXT: movdqa %xmm1, %xmm0
136; SSE-NEXT: psrld $6, %xmm0
137; SSE-NEXT: psrld $4, %xmm1
138; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
139; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
140; SSE-NEXT: movdqa %xmm1, %xmm0
141; SSE-NEXT: retq
142;
143; AVX-LABEL: combine_vec_lshr_lshr1:
144; AVX: # BB#0:
145; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
146; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
147; AVX-NEXT: retq
148 %1 = lshr <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3>
149 %2 = lshr <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7>
150 ret <4 x i32> %2
151}
152
153; fold (srl (srl x, c1), c2) -> 0
154define <4 x i32> @combine_vec_lshr_lshr_zero0(<4 x i32> %x) {
155; SSE-LABEL: combine_vec_lshr_lshr_zero0:
156; SSE: # BB#0:
157; SSE-NEXT: xorps %xmm0, %xmm0
158; SSE-NEXT: retq
159;
160; AVX-LABEL: combine_vec_lshr_lshr_zero0:
161; AVX: # BB#0:
162; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
163; AVX-NEXT: retq
164 %1 = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
165 %2 = lshr <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20>
166 ret <4 x i32> %2
167}
168
169define <4 x i32> @combine_vec_lshr_lshr_zero1(<4 x i32> %x) {
170; SSE-LABEL: combine_vec_lshr_lshr_zero1:
171; SSE: # BB#0:
172; SSE-NEXT: movdqa %xmm0, %xmm1
173; SSE-NEXT: psrld $20, %xmm1
174; SSE-NEXT: movdqa %xmm0, %xmm2
175; SSE-NEXT: psrld $18, %xmm2
176; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
177; SSE-NEXT: movdqa %xmm0, %xmm1
178; SSE-NEXT: psrld $19, %xmm1
179; SSE-NEXT: psrld $17, %xmm0
180; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
181; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
182; SSE-NEXT: movdqa %xmm0, %xmm1
183; SSE-NEXT: psrld $28, %xmm1
184; SSE-NEXT: movdqa %xmm0, %xmm2
185; SSE-NEXT: psrld $26, %xmm2
186; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
187; SSE-NEXT: movdqa %xmm0, %xmm1
188; SSE-NEXT: psrld $27, %xmm1
189; SSE-NEXT: psrld $25, %xmm0
190; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
191; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
192; SSE-NEXT: retq
193;
194; AVX-LABEL: combine_vec_lshr_lshr_zero1:
195; AVX: # BB#0:
196; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
197; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
198; AVX-NEXT: retq
199 %1 = lshr <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20>
200 %2 = lshr <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28>
201 ret <4 x i32> %2
202}
203
204; fold (srl (trunc (srl x, c1)), c2) -> (trunc (srl x, (add c1, c2)))
205define <4 x i32> @combine_vec_lshr_trunc_lshr0(<4 x i64> %x) {
206; SSE-LABEL: combine_vec_lshr_trunc_lshr0:
207; SSE: # BB#0:
Simon Pilgrim7d65b662017-04-25 12:40:45 +0000208; SSE-NEXT: psrlq $48, %xmm1
209; SSE-NEXT: psrlq $48, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +0000210; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000211; SSE-NEXT: retq
212;
213; AVX-LABEL: combine_vec_lshr_trunc_lshr0:
214; AVX: # BB#0:
Simon Pilgrim7d65b662017-04-25 12:40:45 +0000215; AVX-NEXT: vpsrlq $48, %ymm0, %ymm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000216; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
217; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Simon Pilgrim7d65b662017-04-25 12:40:45 +0000218; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim476560a2016-10-18 19:28:12 +0000219; AVX-NEXT: vzeroupper
220; AVX-NEXT: retq
221 %1 = lshr <4 x i64> %x, <i64 32, i64 32, i64 32, i64 32>
222 %2 = trunc <4 x i64> %1 to <4 x i32>
223 %3 = lshr <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
224 ret <4 x i32> %3
225}
226
227define <4 x i32> @combine_vec_lshr_trunc_lshr1(<4 x i64> %x) {
228; SSE-LABEL: combine_vec_lshr_trunc_lshr1:
229; SSE: # BB#0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000230; SSE-NEXT: movdqa %xmm1, %xmm2
231; SSE-NEXT: psrlq $35, %xmm2
232; SSE-NEXT: psrlq $34, %xmm1
233; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000234; SSE-NEXT: movdqa %xmm0, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +0000235; SSE-NEXT: psrlq $33, %xmm2
236; SSE-NEXT: psrlq $32, %xmm0
237; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
238; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
239; SSE-NEXT: movaps %xmm0, %xmm1
240; SSE-NEXT: psrld $19, %xmm1
241; SSE-NEXT: movaps %xmm0, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000242; SSE-NEXT: psrld $17, %xmm2
243; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +0000244; SSE-NEXT: movaps %xmm0, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000245; SSE-NEXT: psrld $18, %xmm1
246; SSE-NEXT: psrld $16, %xmm0
247; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
248; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
249; SSE-NEXT: retq
250;
251; AVX-LABEL: combine_vec_lshr_trunc_lshr1:
252; AVX: # BB#0:
253; AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
254; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
255; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
256; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
257; AVX-NEXT: vzeroupper
258; AVX-NEXT: retq
259 %1 = lshr <4 x i64> %x, <i64 32, i64 33, i64 34, i64 35>
260 %2 = trunc <4 x i64> %1 to <4 x i32>
261 %3 = lshr <4 x i32> %2, <i32 16, i32 17, i32 18, i32 19>
262 ret <4 x i32> %3
263}
264
265; fold (srl (trunc (srl x, c1)), c2) -> 0
266define <4 x i32> @combine_vec_lshr_trunc_lshr_zero0(<4 x i64> %x) {
267; SSE-LABEL: combine_vec_lshr_trunc_lshr_zero0:
268; SSE: # BB#0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +0000269; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000270; SSE-NEXT: retq
271;
272; AVX-LABEL: combine_vec_lshr_trunc_lshr_zero0:
273; AVX: # BB#0:
Sanjay Patel9ca028c2016-10-23 23:13:31 +0000274; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000275; AVX-NEXT: retq
276 %1 = lshr <4 x i64> %x, <i64 48, i64 48, i64 48, i64 48>
277 %2 = trunc <4 x i64> %1 to <4 x i32>
278 %3 = lshr <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
279 ret <4 x i32> %3
280}
281
282define <4 x i32> @combine_vec_lshr_trunc_lshr_zero1(<4 x i64> %x) {
283; SSE-LABEL: combine_vec_lshr_trunc_lshr_zero1:
284; SSE: # BB#0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000285; SSE-NEXT: movdqa %xmm1, %xmm2
286; SSE-NEXT: psrlq $51, %xmm2
287; SSE-NEXT: psrlq $50, %xmm1
288; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000289; SSE-NEXT: movdqa %xmm0, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +0000290; SSE-NEXT: psrlq $49, %xmm2
291; SSE-NEXT: psrlq $48, %xmm0
292; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
293; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
294; SSE-NEXT: movaps %xmm0, %xmm1
295; SSE-NEXT: psrld $27, %xmm1
296; SSE-NEXT: movaps %xmm0, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000297; SSE-NEXT: psrld $25, %xmm2
298; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +0000299; SSE-NEXT: movaps %xmm0, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000300; SSE-NEXT: psrld $26, %xmm1
301; SSE-NEXT: psrld $24, %xmm0
302; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
303; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
304; SSE-NEXT: retq
305;
306; AVX-LABEL: combine_vec_lshr_trunc_lshr_zero1:
307; AVX: # BB#0:
308; AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
309; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
310; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
311; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
312; AVX-NEXT: vzeroupper
313; AVX-NEXT: retq
314 %1 = lshr <4 x i64> %x, <i64 48, i64 49, i64 50, i64 51>
315 %2 = trunc <4 x i64> %1 to <4 x i32>
316 %3 = lshr <4 x i32> %2, <i32 24, i32 25, i32 26, i32 27>
317 ret <4 x i32> %3
318}
319
320; fold (srl (shl x, c), c) -> (and x, cst2)
321define <4 x i32> @combine_vec_lshr_shl_mask0(<4 x i32> %x) {
322; SSE-LABEL: combine_vec_lshr_shl_mask0:
323; SSE: # BB#0:
324; SSE-NEXT: andps {{.*}}(%rip), %xmm0
325; SSE-NEXT: retq
326;
327; AVX-LABEL: combine_vec_lshr_shl_mask0:
328; AVX: # BB#0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000329; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1073741823,1073741823,1073741823,1073741823]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000330; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
331; AVX-NEXT: retq
332 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
333 %2 = lshr <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
334 ret <4 x i32> %2
335}
336
337define <4 x i32> @combine_vec_lshr_shl_mask1(<4 x i32> %x) {
338; SSE-LABEL: combine_vec_lshr_shl_mask1:
339; SSE: # BB#0:
Simon Pilgrim618d3ae2016-10-20 11:10:21 +0000340; SSE-NEXT: andps {{.*}}(%rip), %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000341; SSE-NEXT: retq
342;
343; AVX-LABEL: combine_vec_lshr_shl_mask1:
344; AVX: # BB#0:
Simon Pilgrim618d3ae2016-10-20 11:10:21 +0000345; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000346; AVX-NEXT: retq
347 %1 = shl <4 x i32> %x, <i32 2, i32 3, i32 4, i32 5>
348 %2 = lshr <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
349 ret <4 x i32> %2
350}
351
352; fold (srl (sra X, Y), 31) -> (srl X, 31)
353define <4 x i32> @combine_vec_lshr_ashr_sign(<4 x i32> %x, <4 x i32> %y) {
354; SSE-LABEL: combine_vec_lshr_ashr_sign:
355; SSE: # BB#0:
356; SSE-NEXT: psrld $31, %xmm0
357; SSE-NEXT: retq
358;
359; AVX-LABEL: combine_vec_lshr_ashr_sign:
360; AVX: # BB#0:
361; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
362; AVX-NEXT: retq
363 %1 = ashr <4 x i32> %x, %y
364 %2 = lshr <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31>
365 ret <4 x i32> %2
366}
367
368; fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
369define <4 x i32> @combine_vec_lshr_lzcnt_bit0(<4 x i32> %x) {
370; SSE-LABEL: combine_vec_lshr_lzcnt_bit0:
371; SSE: # BB#0:
372; SSE-NEXT: pand {{.*}}(%rip), %xmm0
373; SSE-NEXT: psrld $4, %xmm0
374; SSE-NEXT: pxor {{.*}}(%rip), %xmm0
375; SSE-NEXT: retq
376;
377; AVX-LABEL: combine_vec_lshr_lzcnt_bit0:
378; AVX: # BB#0:
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000379; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000380; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
381; AVX-NEXT: vpsrld $4, %xmm0, %xmm0
Simon Pilgrim77ce0722017-07-16 11:36:11 +0000382; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000383; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
384; AVX-NEXT: retq
385 %1 = and <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
386 %2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %1, i1 0)
387 %3 = lshr <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
388 ret <4 x i32> %3
389}
390
391define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
392; SSE-LABEL: combine_vec_lshr_lzcnt_bit1:
393; SSE: # BB#0:
394; SSE-NEXT: pand {{.*}}(%rip), %xmm0
395; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
396; SSE-NEXT: movdqa %xmm0, %xmm1
397; SSE-NEXT: pand %xmm2, %xmm1
398; SSE-NEXT: movdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
399; SSE-NEXT: movdqa %xmm3, %xmm4
400; SSE-NEXT: pshufb %xmm1, %xmm4
401; SSE-NEXT: movdqa %xmm0, %xmm1
402; SSE-NEXT: psrlw $4, %xmm1
403; SSE-NEXT: pand %xmm2, %xmm1
404; SSE-NEXT: pxor %xmm2, %xmm2
405; SSE-NEXT: pshufb %xmm1, %xmm3
406; SSE-NEXT: pcmpeqb %xmm2, %xmm1
407; SSE-NEXT: pand %xmm4, %xmm1
408; SSE-NEXT: paddb %xmm3, %xmm1
409; SSE-NEXT: movdqa %xmm0, %xmm3
410; SSE-NEXT: pcmpeqb %xmm2, %xmm3
411; SSE-NEXT: psrlw $8, %xmm3
412; SSE-NEXT: pand %xmm1, %xmm3
413; SSE-NEXT: psrlw $8, %xmm1
414; SSE-NEXT: paddw %xmm3, %xmm1
415; SSE-NEXT: pcmpeqw %xmm2, %xmm0
416; SSE-NEXT: psrld $16, %xmm0
417; SSE-NEXT: pand %xmm1, %xmm0
418; SSE-NEXT: psrld $16, %xmm1
419; SSE-NEXT: paddd %xmm0, %xmm1
420; SSE-NEXT: psrld $5, %xmm1
421; SSE-NEXT: movdqa %xmm1, %xmm0
422; SSE-NEXT: retq
423;
424; AVX-LABEL: combine_vec_lshr_lzcnt_bit1:
425; AVX: # BB#0:
426; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
427; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
428; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
429; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
430; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
431; AVX-NEXT: vpsrlw $4, %xmm0, %xmm4
432; AVX-NEXT: vpand %xmm1, %xmm4, %xmm1
433; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
434; AVX-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm5
435; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
436; AVX-NEXT: vpshufb %xmm1, %xmm3, %xmm1
437; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
438; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
439; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
440; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
441; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
442; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
443; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
444; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
445; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
446; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
447; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
448; AVX-NEXT: vpsrld $5, %xmm0, %xmm0
449; AVX-NEXT: retq
450 %1 = and <4 x i32> %x, <i32 4, i32 32, i32 64, i32 128>
451 %2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %1, i1 0)
452 %3 = lshr <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
453 ret <4 x i32> %3
454}
455declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
456
457; fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
458define <4 x i32> @combine_vec_lshr_trunc_and(<4 x i32> %x, <4 x i64> %y) {
459; SSE-LABEL: combine_vec_lshr_trunc_and:
460; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +0000461; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
462; SSE-NEXT: andps {{.*}}(%rip), %xmm1
463; SSE-NEXT: movaps %xmm1, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000464; SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
465; SSE-NEXT: movdqa %xmm0, %xmm3
466; SSE-NEXT: psrld %xmm2, %xmm3
Sanjay Patela0d8a272016-12-15 18:03:38 +0000467; SSE-NEXT: movaps %xmm1, %xmm2
Simon Pilgrim476560a2016-10-18 19:28:12 +0000468; SSE-NEXT: psrlq $32, %xmm2
469; SSE-NEXT: movdqa %xmm0, %xmm4
470; SSE-NEXT: psrld %xmm2, %xmm4
471; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
472; SSE-NEXT: pxor %xmm2, %xmm2
473; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
474; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
475; SSE-NEXT: movdqa %xmm0, %xmm2
476; SSE-NEXT: psrld %xmm1, %xmm2
477; SSE-NEXT: psrld %xmm3, %xmm0
478; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
479; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
480; SSE-NEXT: retq
481;
482; AVX-LABEL: combine_vec_lshr_trunc_and:
483; AVX: # BB#0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000484; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
485; AVX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrimb2ca2502016-10-19 08:57:37 +0000486; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000487; AVX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
488; AVX-NEXT: vzeroupper
489; AVX-NEXT: retq
490 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535>
491 %2 = trunc <4 x i64> %1 to <4 x i32>
492 %3 = lshr <4 x i32> %x, %2
493 ret <4 x i32> %3
494}