blob: 7d9706075dc7bdeb2a89913a531b04fea143bb61 [file] [log] [blame]
Simon Pilgrim476560a2016-10-18 19:28:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5; fold (shl 0, x) -> 0
6define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) {
7; SSE-LABEL: combine_vec_shl_zero:
8; SSE: # BB#0:
9; SSE-NEXT: xorps %xmm0, %xmm0
10; SSE-NEXT: retq
11;
12; AVX-LABEL: combine_vec_shl_zero:
13; AVX: # BB#0:
14; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15; AVX-NEXT: vpsllvd %xmm0, %xmm1, %xmm0
16; AVX-NEXT: retq
17 %1 = shl <4 x i32> zeroinitializer, %x
18 ret <4 x i32> %1
19}
20
21; fold (shl x, c >= size(x)) -> undef
22define <4 x i32> @combine_vec_shl_outofrange0(<4 x i32> %x) {
23; SSE-LABEL: combine_vec_shl_outofrange0:
24; SSE: # BB#0:
25; SSE-NEXT: retq
26;
27; AVX-LABEL: combine_vec_shl_outofrange0:
28; AVX: # BB#0:
29; AVX-NEXT: retq
30 %1 = shl <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33>
31 ret <4 x i32> %1
32}
33
34define <4 x i32> @combine_vec_shl_outofrange1(<4 x i32> %x) {
35; SSE-LABEL: combine_vec_shl_outofrange1:
36; SSE: # BB#0:
37; SSE-NEXT: retq
38;
39; AVX-LABEL: combine_vec_shl_outofrange1:
40; AVX: # BB#0:
41; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
42; AVX-NEXT: retq
43 %1 = shl <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36>
44 ret <4 x i32> %1
45}
46
47; fold (shl x, 0) -> x
48define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) {
49; SSE-LABEL: combine_vec_shl_by_zero:
50; SSE: # BB#0:
51; SSE-NEXT: retq
52;
53; AVX-LABEL: combine_vec_shl_by_zero:
54; AVX: # BB#0:
55; AVX-NEXT: retq
56 %1 = shl <4 x i32> %x, zeroinitializer
57 ret <4 x i32> %1
58}
59
60; if (shl x, c) is known to be zero, return 0
61define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) {
62; SSE-LABEL: combine_vec_shl_known_zero0:
63; SSE: # BB#0:
64; SSE-NEXT: pxor %xmm1, %xmm1
65; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
66; SSE-NEXT: pslld $16, %xmm0
67; SSE-NEXT: retq
68;
69; AVX-LABEL: combine_vec_shl_known_zero0:
70; AVX: # BB#0:
71; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
72; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
73; AVX-NEXT: vpslld $16, %xmm0, %xmm0
74; AVX-NEXT: retq
75 %1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
76 %2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
77 ret <4 x i32> %2
78}
79
80define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) {
81; SSE-LABEL: combine_vec_shl_known_zero1:
82; SSE: # BB#0:
83; SSE-NEXT: pand {{.*}}(%rip), %xmm0
84; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
85; SSE-NEXT: retq
86;
87; AVX-LABEL: combine_vec_shl_known_zero1:
88; AVX: # BB#0:
89; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
90; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
91; AVX-NEXT: retq
92 %1 = and <4 x i32> %x, <i32 4294901760, i32 8589803520, i32 17179607040, i32 34359214080>
93 %2 = shl <4 x i32> %1, <i32 16, i32 15, i32 14, i32 13>
94 ret <4 x i32> %2
95}
96
97; fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
98define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) {
99; SSE-LABEL: combine_vec_shl_trunc_and:
100; SSE: # BB#0:
101; SSE-NEXT: pand {{.*}}(%rip), %xmm1
102; SSE-NEXT: pand {{.*}}(%rip), %xmm2
103; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
104; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
105; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
106; SSE-NEXT: pslld $23, %xmm1
107; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
108; SSE-NEXT: cvttps2dq %xmm1, %xmm1
109; SSE-NEXT: pmulld %xmm1, %xmm0
110; SSE-NEXT: retq
111;
112; AVX-LABEL: combine_vec_shl_trunc_and:
113; AVX: # BB#0:
114; AVX-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
115; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
116; AVX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
117; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
118; AVX-NEXT: vzeroupper
119; AVX-NEXT: retq
120 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535>
121 %2 = trunc <4 x i64> %1 to <4 x i32>
122 %3 = shl <4 x i32> %x, %2
123 ret <4 x i32> %3
124}
125
126; fold (shl (shl x, c1), c2) -> (shl x, (add c1, c2))
127define <4 x i32> @combine_vec_shl_shl0(<4 x i32> %x) {
128; SSE-LABEL: combine_vec_shl_shl0:
129; SSE: # BB#0:
130; SSE-NEXT: pslld $6, %xmm0
131; SSE-NEXT: retq
132;
133; AVX-LABEL: combine_vec_shl_shl0:
134; AVX: # BB#0:
135; AVX-NEXT: vpslld $6, %xmm0, %xmm0
136; AVX-NEXT: retq
137 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
138 %2 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
139 ret <4 x i32> %2
140}
141
142define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) {
143; SSE-LABEL: combine_vec_shl_shl1:
144; SSE: # BB#0:
145; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
146; SSE-NEXT: retq
147;
148; AVX-LABEL: combine_vec_shl_shl1:
149; AVX: # BB#0:
150; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
151; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
152; AVX-NEXT: retq
153 %1 = shl <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3>
154 %2 = shl <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7>
155 ret <4 x i32> %2
156}
157
158; fold (shl (shl x, c1), c2) -> 0
159define <4 x i32> @combine_vec_shl_shlr_zero0(<4 x i32> %x) {
160; SSE-LABEL: combine_vec_shl_shlr_zero0:
161; SSE: # BB#0:
162; SSE-NEXT: xorps %xmm0, %xmm0
163; SSE-NEXT: retq
164;
165; AVX-LABEL: combine_vec_shl_shlr_zero0:
166; AVX: # BB#0:
167; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
168; AVX-NEXT: retq
169 %1 = shl <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
170 %2 = shl <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20>
171 ret <4 x i32> %2
172}
173
174define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) {
175; SSE-LABEL: combine_vec_shl_shl_zero1:
176; SSE: # BB#0:
177; SSE-NEXT: xorps %xmm0, %xmm0
178; SSE-NEXT: retq
179;
180; AVX-LABEL: combine_vec_shl_shl_zero1:
181; AVX: # BB#0:
182; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
183; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
184; AVX-NEXT: retq
185 %1 = shl <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20>
186 %2 = shl <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28>
187 ret <4 x i32> %2
188}
189
190; fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
191define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) {
192; SSE-LABEL: combine_vec_shl_ext_shl0:
193; SSE: # BB#0:
194; SSE-NEXT: pmovsxwd %xmm0, %xmm2
195; SSE-NEXT: pslld $20, %xmm2
196; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
197; SSE-NEXT: pmovsxwd %xmm0, %xmm1
198; SSE-NEXT: pslld $20, %xmm1
199; SSE-NEXT: movdqa %xmm2, %xmm0
200; SSE-NEXT: retq
201;
202; AVX-LABEL: combine_vec_shl_ext_shl0:
203; AVX: # BB#0:
204; AVX-NEXT: vpmovsxwd %xmm0, %ymm0
205; AVX-NEXT: vpslld $20, %ymm0, %ymm0
206; AVX-NEXT: retq
207 %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
208 %2 = sext <8 x i16> %1 to <8 x i32>
209 %3 = shl <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
210 ret <8 x i32> %3
211}
212
213define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
214; SSE-LABEL: combine_vec_shl_ext_shl1:
215; SSE: # BB#0:
216; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
217; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
218; SSE-NEXT: pmovsxwd %xmm1, %xmm1
219; SSE-NEXT: pmovsxwd %xmm0, %xmm0
220; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
221; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
222; SSE-NEXT: retq
223;
224; AVX-LABEL: combine_vec_shl_ext_shl1:
225; AVX: # BB#0:
226; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
227; AVX-NEXT: vpmovsxwd %xmm0, %ymm0
228; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
229; AVX-NEXT: retq
230 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
231 %2 = sext <8 x i16> %1 to <8 x i32>
232 %3 = shl <8 x i32> %2, <i32 31, i32 31, i32 30, i32 30, i32 29, i32 29, i32 28, i32 28>
233 ret <8 x i32> %3
234}
235
236; fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
237define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
238; SSE-LABEL: combine_vec_shl_zext_lshr0:
239; SSE: # BB#0:
240; SSE-NEXT: movdqa %xmm0, %xmm1
241; SSE-NEXT: pand {{.*}}(%rip), %xmm1
242; SSE-NEXT: pxor %xmm2, %xmm2
243; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
244; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
245; SSE-NEXT: retq
246;
247; AVX-LABEL: combine_vec_shl_zext_lshr0:
248; AVX: # BB#0:
249; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
250; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
251; AVX-NEXT: retq
252 %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
253 %2 = zext <8 x i16> %1 to <8 x i32>
254 %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
255 ret <8 x i32> %3
256}
257
258define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
259; SSE-LABEL: combine_vec_shl_zext_lshr1:
260; SSE: # BB#0:
261; SSE-NEXT: movdqa %xmm0, %xmm1
262; SSE-NEXT: psrlw $8, %xmm1
263; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
264; SSE-NEXT: movdqa %xmm1, %xmm0
265; SSE-NEXT: psrlw $4, %xmm0
266; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6],xmm1[7]
267; SSE-NEXT: movdqa %xmm0, %xmm2
268; SSE-NEXT: psrlw $2, %xmm2
269; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2],xmm0[3,4],xmm2[5,6],xmm0[7]
270; SSE-NEXT: movdqa %xmm2, %xmm1
271; SSE-NEXT: psrlw $1, %xmm1
272; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
273; SSE-NEXT: pxor %xmm2, %xmm2
274; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
275; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
276; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
277; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
278; SSE-NEXT: retq
279;
280; AVX-LABEL: combine_vec_shl_zext_lshr1:
281; AVX: # BB#0:
282; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
283; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,3,4,5,6,7,8]
284; AVX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
285; AVX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
286; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
287; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
288; AVX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
289; AVX-NEXT: retq
290 %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
291 %2 = zext <8 x i16> %1 to <8 x i32>
292 %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
293 ret <8 x i32> %3
294}
295
296; fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
297define <4 x i32> @combine_vec_shl_ge_ashr_extact0(<4 x i32> %x) {
298; SSE-LABEL: combine_vec_shl_ge_ashr_extact0:
299; SSE: # BB#0:
300; SSE-NEXT: pslld $2, %xmm0
301; SSE-NEXT: retq
302;
303; AVX-LABEL: combine_vec_shl_ge_ashr_extact0:
304; AVX: # BB#0:
305; AVX-NEXT: vpslld $2, %xmm0, %xmm0
306; AVX-NEXT: retq
307 %1 = ashr exact <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
308 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
309 ret <4 x i32> %2
310}
311
312define <4 x i32> @combine_vec_shl_ge_ashr_extact1(<4 x i32> %x) {
313; SSE-LABEL: combine_vec_shl_ge_ashr_extact1:
314; SSE: # BB#0:
315; SSE-NEXT: movdqa %xmm0, %xmm1
316; SSE-NEXT: psrad $8, %xmm1
317; SSE-NEXT: movdqa %xmm0, %xmm2
318; SSE-NEXT: psrad $4, %xmm2
319; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
320; SSE-NEXT: movdqa %xmm0, %xmm1
321; SSE-NEXT: psrad $5, %xmm1
322; SSE-NEXT: psrad $3, %xmm0
323; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
324; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
325; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
326; SSE-NEXT: retq
327;
328; AVX-LABEL: combine_vec_shl_ge_ashr_extact1:
329; AVX: # BB#0:
330; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
331; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
332; AVX-NEXT: retq
333 %1 = ashr exact <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8>
334 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
335 ret <4 x i32> %2
336}
337
338; fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
339define <4 x i32> @combine_vec_shl_lt_ashr_extact0(<4 x i32> %x) {
340; SSE-LABEL: combine_vec_shl_lt_ashr_extact0:
341; SSE: # BB#0:
342; SSE-NEXT: psrad $2, %xmm0
343; SSE-NEXT: retq
344;
345; AVX-LABEL: combine_vec_shl_lt_ashr_extact0:
346; AVX: # BB#0:
347; AVX-NEXT: vpsrad $2, %xmm0, %xmm0
348; AVX-NEXT: retq
349 %1 = ashr exact <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
350 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
351 ret <4 x i32> %2
352}
353
354define <4 x i32> @combine_vec_shl_lt_ashr_extact1(<4 x i32> %x) {
355; SSE-LABEL: combine_vec_shl_lt_ashr_extact1:
356; SSE: # BB#0:
357; SSE-NEXT: movdqa %xmm0, %xmm1
358; SSE-NEXT: psrad $8, %xmm1
359; SSE-NEXT: movdqa %xmm0, %xmm2
360; SSE-NEXT: psrad $6, %xmm2
361; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
362; SSE-NEXT: movdqa %xmm0, %xmm1
363; SSE-NEXT: psrad $7, %xmm1
364; SSE-NEXT: psrad $5, %xmm0
365; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
366; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
367; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
368; SSE-NEXT: retq
369;
370; AVX-LABEL: combine_vec_shl_lt_ashr_extact1:
371; AVX: # BB#0:
372; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
373; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
374; AVX-NEXT: retq
375 %1 = ashr exact <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
376 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8>
377 ret <4 x i32> %2
378}
379
380; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) if C2 > C1
381define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) {
382; SSE-LABEL: combine_vec_shl_gt_lshr0:
383; SSE: # BB#0:
384; SSE-NEXT: pslld $2, %xmm0
385; SSE-NEXT: pand {{.*}}(%rip), %xmm0
386; SSE-NEXT: retq
387;
388; AVX-LABEL: combine_vec_shl_gt_lshr0:
389; AVX: # BB#0:
390; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
391; AVX-NEXT: vpslld $2, %xmm0, %xmm0
392; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
393; AVX-NEXT: retq
394 %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
395 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
396 ret <4 x i32> %2
397}
398
399define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) {
400; SSE-LABEL: combine_vec_shl_gt_lshr1:
401; SSE: # BB#0:
402; SSE-NEXT: movdqa %xmm0, %xmm1
403; SSE-NEXT: psrld $8, %xmm1
404; SSE-NEXT: movdqa %xmm0, %xmm2
405; SSE-NEXT: psrld $4, %xmm2
406; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
407; SSE-NEXT: movdqa %xmm0, %xmm1
408; SSE-NEXT: psrld $5, %xmm1
409; SSE-NEXT: psrld $3, %xmm0
410; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
411; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
412; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
413; SSE-NEXT: retq
414;
415; AVX-LABEL: combine_vec_shl_gt_lshr1:
416; AVX: # BB#0:
417; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
418; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
419; AVX-NEXT: retq
420 %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8>
421 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
422 ret <4 x i32> %2
423}
424
425; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 >= C2
426define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) {
427; SSE-LABEL: combine_vec_shl_le_lshr0:
428; SSE: # BB#0:
429; SSE-NEXT: psrld $2, %xmm0
430; SSE-NEXT: pand {{.*}}(%rip), %xmm0
431; SSE-NEXT: retq
432;
433; AVX-LABEL: combine_vec_shl_le_lshr0:
434; AVX: # BB#0:
435; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
436; AVX-NEXT: vpsrld $2, %xmm0, %xmm0
437; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
438; AVX-NEXT: retq
439 %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
440 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
441 ret <4 x i32> %2
442}
443
444define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) {
445; SSE-LABEL: combine_vec_shl_le_lshr1:
446; SSE: # BB#0:
447; SSE-NEXT: movdqa %xmm0, %xmm1
448; SSE-NEXT: psrld $8, %xmm1
449; SSE-NEXT: movdqa %xmm0, %xmm2
450; SSE-NEXT: psrld $6, %xmm2
451; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
452; SSE-NEXT: movdqa %xmm0, %xmm1
453; SSE-NEXT: psrld $7, %xmm1
454; SSE-NEXT: psrld $5, %xmm0
455; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
456; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
457; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
458; SSE-NEXT: retq
459;
460; AVX-LABEL: combine_vec_shl_le_lshr1:
461; AVX: # BB#0:
462; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
463; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
464; AVX-NEXT: retq
465 %1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
466 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8>
467 ret <4 x i32> %2
468}
469
470; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
471define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) {
472; SSE-LABEL: combine_vec_shl_ashr0:
473; SSE: # BB#0:
474; SSE-NEXT: andps {{.*}}(%rip), %xmm0
475; SSE-NEXT: retq
476;
477; AVX-LABEL: combine_vec_shl_ashr0:
478; AVX: # BB#0:
479; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
480; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
481; AVX-NEXT: retq
482 %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
483 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
484 ret <4 x i32> %2
485}
486
487define <4 x i32> @combine_vec_shl_ashr1(<4 x i32> %x) {
488; SSE-LABEL: combine_vec_shl_ashr1:
489; SSE: # BB#0:
490; SSE-NEXT: movdqa %xmm0, %xmm1
491; SSE-NEXT: psrad $8, %xmm1
492; SSE-NEXT: movdqa %xmm0, %xmm2
493; SSE-NEXT: psrad $6, %xmm2
494; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
495; SSE-NEXT: movdqa %xmm0, %xmm1
496; SSE-NEXT: psrad $7, %xmm1
497; SSE-NEXT: psrad $5, %xmm0
498; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
499; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
500; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
501; SSE-NEXT: retq
502;
503; AVX-LABEL: combine_vec_shl_ashr1:
504; AVX: # BB#0:
505; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,6,7,8]
506; AVX-NEXT: vpsravd %xmm1, %xmm0, %xmm0
507; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
508; AVX-NEXT: retq
509 %1 = ashr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
510 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
511 ret <4 x i32> %2
512}
513
514; fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
515define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) {
516; SSE-LABEL: combine_vec_shl_add0:
517; SSE: # BB#0:
518; SSE-NEXT: pslld $2, %xmm0
519; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
520; SSE-NEXT: retq
521;
522; AVX-LABEL: combine_vec_shl_add0:
523; AVX: # BB#0:
524; AVX-NEXT: vpslld $2, %xmm0, %xmm0
525; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
526; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
527; AVX-NEXT: retq
528 %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
529 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
530 ret <4 x i32> %2
531}
532
533define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) {
534; SSE-LABEL: combine_vec_shl_add1:
535; SSE: # BB#0:
536; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
537; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
538; SSE-NEXT: retq
539;
540; AVX-LABEL: combine_vec_shl_add1:
541; AVX: # BB#0:
542; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
543; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
544; AVX-NEXT: retq
545 %1 = add <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
546 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
547 ret <4 x i32> %2
548}
549
550; fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
551define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) {
552; SSE-LABEL: combine_vec_shl_mul0:
553; SSE: # BB#0:
554; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
555; SSE-NEXT: retq
556;
557; AVX-LABEL: combine_vec_shl_mul0:
558; AVX: # BB#0:
559; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
560; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
561; AVX-NEXT: retq
562 %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
563 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
564 ret <4 x i32> %2
565}
566
567define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) {
568; SSE-LABEL: combine_vec_shl_mul1:
569; SSE: # BB#0:
570; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
571; SSE-NEXT: retq
572;
573; AVX-LABEL: combine_vec_shl_mul1:
574; AVX: # BB#0:
575; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
576; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
577; AVX-NEXT: retq
578 %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
579 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
580 ret <4 x i32> %2
581}