blob: fa680d60a8e88da6c801a9559f9184064a369d69 [file] [log] [blame]
Simon Pilgrim476560a2016-10-18 19:28:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5; fold (shl 0, x) -> 0
6define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) {
7; SSE-LABEL: combine_vec_shl_zero:
8; SSE: # BB#0:
9; SSE-NEXT: xorps %xmm0, %xmm0
10; SSE-NEXT: retq
11;
12; AVX-LABEL: combine_vec_shl_zero:
13; AVX: # BB#0:
14; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15; AVX-NEXT: vpsllvd %xmm0, %xmm1, %xmm0
16; AVX-NEXT: retq
17 %1 = shl <4 x i32> zeroinitializer, %x
18 ret <4 x i32> %1
19}
20
21; fold (shl x, c >= size(x)) -> undef
22define <4 x i32> @combine_vec_shl_outofrange0(<4 x i32> %x) {
23; SSE-LABEL: combine_vec_shl_outofrange0:
24; SSE: # BB#0:
25; SSE-NEXT: retq
26;
27; AVX-LABEL: combine_vec_shl_outofrange0:
28; AVX: # BB#0:
29; AVX-NEXT: retq
30 %1 = shl <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33>
31 ret <4 x i32> %1
32}
33
34define <4 x i32> @combine_vec_shl_outofrange1(<4 x i32> %x) {
35; SSE-LABEL: combine_vec_shl_outofrange1:
36; SSE: # BB#0:
37; SSE-NEXT: retq
38;
39; AVX-LABEL: combine_vec_shl_outofrange1:
40; AVX: # BB#0:
41; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
42; AVX-NEXT: retq
43 %1 = shl <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36>
44 ret <4 x i32> %1
45}
46
Simon Pilgrim87f38fa2016-11-10 14:19:45 +000047define <4 x i32> @combine_vec_shl_outofrange2(<4 x i32> %a0) {
48; SSE-LABEL: combine_vec_shl_outofrange2:
49; SSE: # BB#0:
Simon Pilgrim87f38fa2016-11-10 14:19:45 +000050; SSE-NEXT: retq
51;
52; AVX-LABEL: combine_vec_shl_outofrange2:
53; AVX: # BB#0:
Simon Pilgrim87f38fa2016-11-10 14:19:45 +000054; AVX-NEXT: retq
55 %1 = and <4 x i32> %a0, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
56 %2 = shl <4 x i32> %1, <i32 33, i32 33, i32 33, i32 33>
57 ret <4 x i32> %2
58}
59
Simon Pilgrim476560a2016-10-18 19:28:12 +000060; fold (shl x, 0) -> x
61define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) {
62; SSE-LABEL: combine_vec_shl_by_zero:
63; SSE: # BB#0:
64; SSE-NEXT: retq
65;
66; AVX-LABEL: combine_vec_shl_by_zero:
67; AVX: # BB#0:
68; AVX-NEXT: retq
69 %1 = shl <4 x i32> %x, zeroinitializer
70 ret <4 x i32> %1
71}
72
73; if (shl x, c) is known to be zero, return 0
74define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) {
75; SSE-LABEL: combine_vec_shl_known_zero0:
76; SSE: # BB#0:
Sanjay Patelca92c362016-10-21 20:16:27 +000077; SSE-NEXT: xorps %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000078; SSE-NEXT: retq
79;
80; AVX-LABEL: combine_vec_shl_known_zero0:
81; AVX: # BB#0:
Sanjay Patelca92c362016-10-21 20:16:27 +000082; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +000083; AVX-NEXT: retq
84 %1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
85 %2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
86 ret <4 x i32> %2
87}
88
89define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) {
90; SSE-LABEL: combine_vec_shl_known_zero1:
91; SSE: # BB#0:
92; SSE-NEXT: pand {{.*}}(%rip), %xmm0
93; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
94; SSE-NEXT: retq
95;
96; AVX-LABEL: combine_vec_shl_known_zero1:
97; AVX: # BB#0:
98; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
99; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
100; AVX-NEXT: retq
101 %1 = and <4 x i32> %x, <i32 4294901760, i32 8589803520, i32 17179607040, i32 34359214080>
102 %2 = shl <4 x i32> %1, <i32 16, i32 15, i32 14, i32 13>
103 ret <4 x i32> %2
104}
105
106; fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
107define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) {
108; SSE-LABEL: combine_vec_shl_trunc_and:
109; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +0000110; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
111; SSE-NEXT: andps {{.*}}(%rip), %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000112; SSE-NEXT: pslld $23, %xmm1
113; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
114; SSE-NEXT: cvttps2dq %xmm1, %xmm1
115; SSE-NEXT: pmulld %xmm1, %xmm0
116; SSE-NEXT: retq
117;
118; AVX-LABEL: combine_vec_shl_trunc_and:
119; AVX: # BB#0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000120; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
121; AVX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrimb2ca2502016-10-19 08:57:37 +0000122; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim476560a2016-10-18 19:28:12 +0000123; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
124; AVX-NEXT: vzeroupper
125; AVX-NEXT: retq
126 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535>
127 %2 = trunc <4 x i64> %1 to <4 x i32>
128 %3 = shl <4 x i32> %x, %2
129 ret <4 x i32> %3
130}
131
132; fold (shl (shl x, c1), c2) -> (shl x, (add c1, c2))
133define <4 x i32> @combine_vec_shl_shl0(<4 x i32> %x) {
134; SSE-LABEL: combine_vec_shl_shl0:
135; SSE: # BB#0:
136; SSE-NEXT: pslld $6, %xmm0
137; SSE-NEXT: retq
138;
139; AVX-LABEL: combine_vec_shl_shl0:
140; AVX: # BB#0:
141; AVX-NEXT: vpslld $6, %xmm0, %xmm0
142; AVX-NEXT: retq
143 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
144 %2 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
145 ret <4 x i32> %2
146}
147
148define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) {
149; SSE-LABEL: combine_vec_shl_shl1:
150; SSE: # BB#0:
151; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
152; SSE-NEXT: retq
153;
154; AVX-LABEL: combine_vec_shl_shl1:
155; AVX: # BB#0:
156; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
157; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
158; AVX-NEXT: retq
159 %1 = shl <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3>
160 %2 = shl <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7>
161 ret <4 x i32> %2
162}
163
164; fold (shl (shl x, c1), c2) -> 0
165define <4 x i32> @combine_vec_shl_shlr_zero0(<4 x i32> %x) {
166; SSE-LABEL: combine_vec_shl_shlr_zero0:
167; SSE: # BB#0:
168; SSE-NEXT: xorps %xmm0, %xmm0
169; SSE-NEXT: retq
170;
171; AVX-LABEL: combine_vec_shl_shlr_zero0:
172; AVX: # BB#0:
173; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
174; AVX-NEXT: retq
175 %1 = shl <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
176 %2 = shl <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20>
177 ret <4 x i32> %2
178}
179
180define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) {
181; SSE-LABEL: combine_vec_shl_shl_zero1:
182; SSE: # BB#0:
183; SSE-NEXT: xorps %xmm0, %xmm0
184; SSE-NEXT: retq
185;
186; AVX-LABEL: combine_vec_shl_shl_zero1:
187; AVX: # BB#0:
188; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
189; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
190; AVX-NEXT: retq
191 %1 = shl <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20>
192 %2 = shl <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28>
193 ret <4 x i32> %2
194}
195
196; fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
197define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) {
198; SSE-LABEL: combine_vec_shl_ext_shl0:
199; SSE: # BB#0:
200; SSE-NEXT: pmovsxwd %xmm0, %xmm2
201; SSE-NEXT: pslld $20, %xmm2
202; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
203; SSE-NEXT: pmovsxwd %xmm0, %xmm1
204; SSE-NEXT: pslld $20, %xmm1
205; SSE-NEXT: movdqa %xmm2, %xmm0
206; SSE-NEXT: retq
207;
208; AVX-LABEL: combine_vec_shl_ext_shl0:
209; AVX: # BB#0:
210; AVX-NEXT: vpmovsxwd %xmm0, %ymm0
211; AVX-NEXT: vpslld $20, %ymm0, %ymm0
212; AVX-NEXT: retq
213 %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
214 %2 = sext <8 x i16> %1 to <8 x i32>
215 %3 = shl <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
216 ret <8 x i32> %3
217}
218
219define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
220; SSE-LABEL: combine_vec_shl_ext_shl1:
221; SSE: # BB#0:
222; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
223; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
224; SSE-NEXT: pmovsxwd %xmm1, %xmm1
225; SSE-NEXT: pmovsxwd %xmm0, %xmm0
226; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
227; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
228; SSE-NEXT: retq
229;
230; AVX-LABEL: combine_vec_shl_ext_shl1:
231; AVX: # BB#0:
232; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
233; AVX-NEXT: vpmovsxwd %xmm0, %ymm0
234; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
235; AVX-NEXT: retq
236 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
237 %2 = sext <8 x i16> %1 to <8 x i32>
238 %3 = shl <8 x i32> %2, <i32 31, i32 31, i32 30, i32 30, i32 29, i32 29, i32 28, i32 28>
239 ret <8 x i32> %3
240}
241
242; fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
243define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
244; SSE-LABEL: combine_vec_shl_zext_lshr0:
245; SSE: # BB#0:
246; SSE-NEXT: movdqa %xmm0, %xmm1
247; SSE-NEXT: pand {{.*}}(%rip), %xmm1
248; SSE-NEXT: pxor %xmm2, %xmm2
249; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
250; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
251; SSE-NEXT: retq
252;
253; AVX-LABEL: combine_vec_shl_zext_lshr0:
254; AVX: # BB#0:
255; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
256; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
257; AVX-NEXT: retq
258 %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
259 %2 = zext <8 x i16> %1 to <8 x i32>
260 %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
261 ret <8 x i32> %3
262}
263
264define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
265; SSE-LABEL: combine_vec_shl_zext_lshr1:
266; SSE: # BB#0:
267; SSE-NEXT: movdqa %xmm0, %xmm1
268; SSE-NEXT: psrlw $8, %xmm1
269; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
270; SSE-NEXT: movdqa %xmm1, %xmm0
271; SSE-NEXT: psrlw $4, %xmm0
272; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6],xmm1[7]
273; SSE-NEXT: movdqa %xmm0, %xmm2
274; SSE-NEXT: psrlw $2, %xmm2
275; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2],xmm0[3,4],xmm2[5,6],xmm0[7]
276; SSE-NEXT: movdqa %xmm2, %xmm1
277; SSE-NEXT: psrlw $1, %xmm1
278; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
279; SSE-NEXT: pxor %xmm2, %xmm2
280; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
281; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
282; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
283; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
284; SSE-NEXT: retq
285;
286; AVX-LABEL: combine_vec_shl_zext_lshr1:
287; AVX: # BB#0:
288; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
289; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,3,4,5,6,7,8]
290; AVX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
Craig Topper6a35a812017-02-05 18:33:14 +0000291; AVX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
Simon Pilgrim476560a2016-10-18 19:28:12 +0000292; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
293; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
294; AVX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
295; AVX-NEXT: retq
296 %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
297 %2 = zext <8 x i16> %1 to <8 x i32>
298 %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
299 ret <8 x i32> %3
300}
301
302; fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
303define <4 x i32> @combine_vec_shl_ge_ashr_extact0(<4 x i32> %x) {
304; SSE-LABEL: combine_vec_shl_ge_ashr_extact0:
305; SSE: # BB#0:
306; SSE-NEXT: pslld $2, %xmm0
307; SSE-NEXT: retq
308;
309; AVX-LABEL: combine_vec_shl_ge_ashr_extact0:
310; AVX: # BB#0:
311; AVX-NEXT: vpslld $2, %xmm0, %xmm0
312; AVX-NEXT: retq
313 %1 = ashr exact <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
314 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
315 ret <4 x i32> %2
316}
317
318define <4 x i32> @combine_vec_shl_ge_ashr_extact1(<4 x i32> %x) {
319; SSE-LABEL: combine_vec_shl_ge_ashr_extact1:
320; SSE: # BB#0:
321; SSE-NEXT: movdqa %xmm0, %xmm1
322; SSE-NEXT: psrad $8, %xmm1
323; SSE-NEXT: movdqa %xmm0, %xmm2
324; SSE-NEXT: psrad $4, %xmm2
325; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
326; SSE-NEXT: movdqa %xmm0, %xmm1
327; SSE-NEXT: psrad $5, %xmm1
328; SSE-NEXT: psrad $3, %xmm0
329; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
330; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
331; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
332; SSE-NEXT: retq
333;
334; AVX-LABEL: combine_vec_shl_ge_ashr_extact1:
335; AVX: # BB#0:
336; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
337; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
338; AVX-NEXT: retq
339 %1 = ashr exact <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8>
340 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
341 ret <4 x i32> %2
342}
343
344; fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
345define <4 x i32> @combine_vec_shl_lt_ashr_extact0(<4 x i32> %x) {
346; SSE-LABEL: combine_vec_shl_lt_ashr_extact0:
347; SSE: # BB#0:
348; SSE-NEXT: psrad $2, %xmm0
349; SSE-NEXT: retq
350;
351; AVX-LABEL: combine_vec_shl_lt_ashr_extact0:
352; AVX: # BB#0:
353; AVX-NEXT: vpsrad $2, %xmm0, %xmm0
354; AVX-NEXT: retq
355 %1 = ashr exact <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
356 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
357 ret <4 x i32> %2
358}
359
360define <4 x i32> @combine_vec_shl_lt_ashr_extact1(<4 x i32> %x) {
361; SSE-LABEL: combine_vec_shl_lt_ashr_extact1:
362; SSE: # BB#0:
363; SSE-NEXT: movdqa %xmm0, %xmm1
364; SSE-NEXT: psrad $8, %xmm1
365; SSE-NEXT: movdqa %xmm0, %xmm2
366; SSE-NEXT: psrad $6, %xmm2
367; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
368; SSE-NEXT: movdqa %xmm0, %xmm1
369; SSE-NEXT: psrad $7, %xmm1
370; SSE-NEXT: psrad $5, %xmm0
371; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
372; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
373; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
374; SSE-NEXT: retq
375;
376; AVX-LABEL: combine_vec_shl_lt_ashr_extact1:
377; AVX: # BB#0:
378; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
379; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
380; AVX-NEXT: retq
381 %1 = ashr exact <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
382 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8>
383 ret <4 x i32> %2
384}
385
386; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) if C2 > C1
387define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) {
388; SSE-LABEL: combine_vec_shl_gt_lshr0:
389; SSE: # BB#0:
390; SSE-NEXT: pslld $2, %xmm0
391; SSE-NEXT: pand {{.*}}(%rip), %xmm0
392; SSE-NEXT: retq
393;
394; AVX-LABEL: combine_vec_shl_gt_lshr0:
395; AVX: # BB#0:
396; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
397; AVX-NEXT: vpslld $2, %xmm0, %xmm0
398; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
399; AVX-NEXT: retq
400 %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
401 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
402 ret <4 x i32> %2
403}
404
405define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) {
406; SSE-LABEL: combine_vec_shl_gt_lshr1:
407; SSE: # BB#0:
408; SSE-NEXT: movdqa %xmm0, %xmm1
409; SSE-NEXT: psrld $8, %xmm1
410; SSE-NEXT: movdqa %xmm0, %xmm2
411; SSE-NEXT: psrld $4, %xmm2
412; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
413; SSE-NEXT: movdqa %xmm0, %xmm1
414; SSE-NEXT: psrld $5, %xmm1
415; SSE-NEXT: psrld $3, %xmm0
416; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
417; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
418; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
419; SSE-NEXT: retq
420;
421; AVX-LABEL: combine_vec_shl_gt_lshr1:
422; AVX: # BB#0:
423; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
424; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
425; AVX-NEXT: retq
426 %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8>
427 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
428 ret <4 x i32> %2
429}
430
431; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 >= C2
432define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) {
433; SSE-LABEL: combine_vec_shl_le_lshr0:
434; SSE: # BB#0:
435; SSE-NEXT: psrld $2, %xmm0
436; SSE-NEXT: pand {{.*}}(%rip), %xmm0
437; SSE-NEXT: retq
438;
439; AVX-LABEL: combine_vec_shl_le_lshr0:
440; AVX: # BB#0:
441; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
442; AVX-NEXT: vpsrld $2, %xmm0, %xmm0
443; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
444; AVX-NEXT: retq
445 %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
446 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
447 ret <4 x i32> %2
448}
449
450define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) {
451; SSE-LABEL: combine_vec_shl_le_lshr1:
452; SSE: # BB#0:
453; SSE-NEXT: movdqa %xmm0, %xmm1
454; SSE-NEXT: psrld $8, %xmm1
455; SSE-NEXT: movdqa %xmm0, %xmm2
456; SSE-NEXT: psrld $6, %xmm2
457; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
458; SSE-NEXT: movdqa %xmm0, %xmm1
459; SSE-NEXT: psrld $7, %xmm1
460; SSE-NEXT: psrld $5, %xmm0
461; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
462; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
463; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
464; SSE-NEXT: retq
465;
466; AVX-LABEL: combine_vec_shl_le_lshr1:
467; AVX: # BB#0:
468; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
469; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
470; AVX-NEXT: retq
471 %1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
472 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8>
473 ret <4 x i32> %2
474}
475
476; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
477define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) {
478; SSE-LABEL: combine_vec_shl_ashr0:
479; SSE: # BB#0:
480; SSE-NEXT: andps {{.*}}(%rip), %xmm0
481; SSE-NEXT: retq
482;
483; AVX-LABEL: combine_vec_shl_ashr0:
484; AVX: # BB#0:
485; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
486; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
487; AVX-NEXT: retq
488 %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
489 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
490 ret <4 x i32> %2
491}
492
493define <4 x i32> @combine_vec_shl_ashr1(<4 x i32> %x) {
494; SSE-LABEL: combine_vec_shl_ashr1:
495; SSE: # BB#0:
Simon Pilgrim4554e162016-10-19 16:15:30 +0000496; SSE-NEXT: andps {{.*}}(%rip), %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000497; SSE-NEXT: retq
498;
499; AVX-LABEL: combine_vec_shl_ashr1:
500; AVX: # BB#0:
Simon Pilgrim4554e162016-10-19 16:15:30 +0000501; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000502; AVX-NEXT: retq
503 %1 = ashr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
504 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
505 ret <4 x i32> %2
506}
507
508; fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
509define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) {
510; SSE-LABEL: combine_vec_shl_add0:
511; SSE: # BB#0:
512; SSE-NEXT: pslld $2, %xmm0
513; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
514; SSE-NEXT: retq
515;
516; AVX-LABEL: combine_vec_shl_add0:
517; AVX: # BB#0:
518; AVX-NEXT: vpslld $2, %xmm0, %xmm0
519; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
520; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
521; AVX-NEXT: retq
522 %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
523 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
524 ret <4 x i32> %2
525}
526
527define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) {
528; SSE-LABEL: combine_vec_shl_add1:
529; SSE: # BB#0:
530; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
531; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
532; SSE-NEXT: retq
533;
534; AVX-LABEL: combine_vec_shl_add1:
535; AVX: # BB#0:
Simon Pilgrim476560a2016-10-18 19:28:12 +0000536; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrima20aeea2016-10-19 17:12:22 +0000537; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000538; AVX-NEXT: retq
539 %1 = add <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
540 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
541 ret <4 x i32> %2
542}
543
Simon Pilgrimd9c53712016-12-08 10:17:25 +0000544; FIXME: fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
545define <4 x i32> @combine_vec_shl_or0(<4 x i32> %x) {
546; SSE-LABEL: combine_vec_shl_or0:
547; SSE: # BB#0:
548; SSE-NEXT: por {{.*}}(%rip), %xmm0
549; SSE-NEXT: pslld $2, %xmm0
550; SSE-NEXT: retq
551;
552; AVX-LABEL: combine_vec_shl_or0:
553; AVX: # BB#0:
554; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
555; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
556; AVX-NEXT: vpslld $2, %xmm0, %xmm0
557; AVX-NEXT: retq
558 %1 = or <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
559 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
560 ret <4 x i32> %2
561}
562
563define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) {
564; SSE-LABEL: combine_vec_shl_or1:
565; SSE: # BB#0:
566; SSE-NEXT: por {{.*}}(%rip), %xmm0
567; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
568; SSE-NEXT: retq
569;
570; AVX-LABEL: combine_vec_shl_or1:
571; AVX: # BB#0:
572; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
573; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
574; AVX-NEXT: retq
575 %1 = or <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
576 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
577 ret <4 x i32> %2
578}
579
Simon Pilgrim476560a2016-10-18 19:28:12 +0000580; fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
581define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) {
582; SSE-LABEL: combine_vec_shl_mul0:
583; SSE: # BB#0:
584; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
585; SSE-NEXT: retq
586;
587; AVX-LABEL: combine_vec_shl_mul0:
588; AVX: # BB#0:
589; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
590; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
591; AVX-NEXT: retq
592 %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
593 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
594 ret <4 x i32> %2
595}
596
597define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) {
598; SSE-LABEL: combine_vec_shl_mul1:
599; SSE: # BB#0:
600; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
601; SSE-NEXT: retq
602;
603; AVX-LABEL: combine_vec_shl_mul1:
604; AVX: # BB#0:
605; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim476560a2016-10-18 19:28:12 +0000606; AVX-NEXT: retq
607 %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
608 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
609 ret <4 x i32> %2
610}