blob: fb10135e8526aa970f5d7206a1a154aca50c9934 [file] [log] [blame]
Sanjay Patelfd58d622019-01-03 22:26:51 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1,AVX1-SLOW
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1,AVX1-FAST
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX2,AVX2-FAST
Sanjay Patelac23c462019-01-03 22:42:32 +00008; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW
9; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512,AVX512-FAST
Sanjay Patelfd58d622019-01-03 22:26:51 +000010
11; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
12
13define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
14; SSE-LABEL: test14_undef:
15; SSE: # %bb.0:
16; SSE-NEXT: phaddd %xmm2, %xmm0
17; SSE-NEXT: retq
18;
19; AVX1-LABEL: test14_undef:
20; AVX1: # %bb.0:
21; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
22; AVX1-NEXT: retq
23;
24; AVX2-LABEL: test14_undef:
25; AVX2: # %bb.0:
26; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
27; AVX2-NEXT: retq
Sanjay Patelac23c462019-01-03 22:42:32 +000028;
29; AVX512-LABEL: test14_undef:
30; AVX512: # %bb.0:
31; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
32; AVX512-NEXT: retq
Sanjay Patelfd58d622019-01-03 22:26:51 +000033 %vecext = extractelement <8 x i32> %a, i32 0
34 %vecext1 = extractelement <8 x i32> %a, i32 1
35 %add = add i32 %vecext, %vecext1
36 %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
37 %vecext2 = extractelement <8 x i32> %b, i32 2
38 %vecext3 = extractelement <8 x i32> %b, i32 3
39 %add4 = add i32 %vecext2, %vecext3
40 %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3
41 ret <8 x i32> %vecinit5
42}
43
44; integer horizontal adds instead of two scalar adds followed by vector inserts.
45define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) {
46; SSE-LABEL: test15_undef:
47; SSE: # %bb.0:
48; SSE-NEXT: movd %xmm0, %eax
49; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
50; SSE-NEXT: movd %xmm0, %ecx
51; SSE-NEXT: addl %eax, %ecx
52; SSE-NEXT: movd %xmm3, %eax
53; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
54; SSE-NEXT: movd %xmm0, %edx
55; SSE-NEXT: addl %eax, %edx
56; SSE-NEXT: movd %ecx, %xmm0
57; SSE-NEXT: movd %edx, %xmm1
58; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
59; SSE-NEXT: retq
60;
61; AVX1-LABEL: test15_undef:
62; AVX1: # %bb.0:
63; AVX1-NEXT: vmovd %xmm0, %eax
64; AVX1-NEXT: vpextrd $1, %xmm0, %ecx
65; AVX1-NEXT: addl %eax, %ecx
66; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
67; AVX1-NEXT: vmovd %xmm0, %eax
68; AVX1-NEXT: vpextrd $1, %xmm0, %edx
69; AVX1-NEXT: addl %eax, %edx
70; AVX1-NEXT: vmovd %ecx, %xmm0
71; AVX1-NEXT: vmovd %edx, %xmm1
72; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
73; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
74; AVX1-NEXT: retq
75;
76; AVX2-LABEL: test15_undef:
77; AVX2: # %bb.0:
Sanjay Pateled5cfc62019-01-10 15:04:52 +000078; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
Sanjay Patelfd58d622019-01-03 22:26:51 +000079; AVX2-NEXT: retq
Sanjay Patelac23c462019-01-03 22:42:32 +000080;
81; AVX512-LABEL: test15_undef:
82; AVX512: # %bb.0:
Sanjay Pateled5cfc62019-01-10 15:04:52 +000083; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
Sanjay Patelac23c462019-01-03 22:42:32 +000084; AVX512-NEXT: retq
Sanjay Patelfd58d622019-01-03 22:26:51 +000085 %vecext = extractelement <8 x i32> %a, i32 0
86 %vecext1 = extractelement <8 x i32> %a, i32 1
87 %add = add i32 %vecext, %vecext1
88 %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
89 %vecext2 = extractelement <8 x i32> %b, i32 4
90 %vecext3 = extractelement <8 x i32> %b, i32 5
91 %add4 = add i32 %vecext2, %vecext3
92 %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6
93 ret <8 x i32> %vecinit5
94}
95
Sanjay Patel6a18c532019-01-08 19:15:21 +000096define <8 x i32> @PR40243_alt(<8 x i32> %a, <8 x i32> %b) {
97; SSE-LABEL: PR40243_alt:
98; SSE: # %bb.0:
99; SSE-NEXT: phaddd %xmm3, %xmm1
100; SSE-NEXT: retq
101;
102; AVX1-LABEL: PR40243_alt:
103; AVX1: # %bb.0:
Sanjay Patel87ae1462019-01-10 15:27:23 +0000104; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
105; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
106; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
107; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Sanjay Patel6a18c532019-01-08 19:15:21 +0000108; AVX1-NEXT: retq
109;
110; AVX2-LABEL: PR40243_alt:
111; AVX2: # %bb.0:
Sanjay Pateled5cfc62019-01-10 15:04:52 +0000112; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
Sanjay Patel6a18c532019-01-08 19:15:21 +0000113; AVX2-NEXT: retq
114;
115; AVX512-LABEL: PR40243_alt:
116; AVX512: # %bb.0:
Sanjay Pateled5cfc62019-01-10 15:04:52 +0000117; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
Sanjay Patel6a18c532019-01-08 19:15:21 +0000118; AVX512-NEXT: retq
119 %a4 = extractelement <8 x i32> %a, i32 4
120 %a5 = extractelement <8 x i32> %a, i32 5
121 %add4 = add i32 %a4, %a5
122 %b6 = extractelement <8 x i32> %b, i32 6
123 %b7 = extractelement <8 x i32> %b, i32 7
124 %add7 = add i32 %b6, %b7
125 %r4 = insertelement <8 x i32> undef, i32 %add4, i32 4
126 %r = insertelement <8 x i32> %r4, i32 %add7, i32 7
127 ret <8 x i32> %r
128}
129
Sanjay Patelfd58d622019-01-03 22:26:51 +0000130define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
131; SSE-LABEL: test16_undef:
132; SSE: # %bb.0:
133; SSE-NEXT: phaddd %xmm0, %xmm0
134; SSE-NEXT: retq
135;
136; AVX1-LABEL: test16_undef:
137; AVX1: # %bb.0:
138; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
139; AVX1-NEXT: retq
140;
141; AVX2-LABEL: test16_undef:
142; AVX2: # %bb.0:
143; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
144; AVX2-NEXT: retq
Sanjay Patelac23c462019-01-03 22:42:32 +0000145;
146; AVX512-LABEL: test16_undef:
147; AVX512: # %bb.0:
148; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
149; AVX512-NEXT: retq
Sanjay Patelfd58d622019-01-03 22:26:51 +0000150 %vecext = extractelement <8 x i32> %a, i32 0
151 %vecext1 = extractelement <8 x i32> %a, i32 1
152 %add = add i32 %vecext, %vecext1
153 %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
154 %vecext2 = extractelement <8 x i32> %a, i32 2
155 %vecext3 = extractelement <8 x i32> %a, i32 3
156 %add4 = add i32 %vecext2, %vecext3
157 %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
158 ret <8 x i32> %vecinit5
159}
160
Sanjay Patelb8687c22019-01-03 22:55:18 +0000161define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {
162; SSE-LABEL: test16_v16i32_undef:
163; SSE: # %bb.0:
164; SSE-NEXT: phaddd %xmm0, %xmm0
165; SSE-NEXT: retq
166;
167; AVX1-LABEL: test16_v16i32_undef:
168; AVX1: # %bb.0:
169; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
170; AVX1-NEXT: retq
171;
172; AVX2-LABEL: test16_v16i32_undef:
173; AVX2: # %bb.0:
174; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
175; AVX2-NEXT: retq
176;
177; AVX512-LABEL: test16_v16i32_undef:
178; AVX512: # %bb.0:
Sanjay Patel40cd4b72019-01-11 14:27:59 +0000179; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
Sanjay Patelb8687c22019-01-03 22:55:18 +0000180; AVX512-NEXT: retq
181 %vecext = extractelement <16 x i32> %a, i32 0
182 %vecext1 = extractelement <16 x i32> %a, i32 1
183 %add = add i32 %vecext, %vecext1
184 %vecinit = insertelement <16 x i32> undef, i32 %add, i32 0
185 %vecext2 = extractelement <16 x i32> %a, i32 2
186 %vecext3 = extractelement <16 x i32> %a, i32 3
187 %add4 = add i32 %vecext2, %vecext3
188 %vecinit5 = insertelement <16 x i32> %vecinit, i32 %add4, i32 1
189 ret <16 x i32> %vecinit5
190}
191
Sanjay Patelfd58d622019-01-03 22:26:51 +0000192define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) {
193; SSE-LABEL: test17_undef:
194; SSE: # %bb.0:
195; SSE-NEXT: phaddd %xmm1, %xmm0
196; SSE-NEXT: retq
197;
198; AVX1-LABEL: test17_undef:
199; AVX1: # %bb.0:
200; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
201; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
202; AVX1-NEXT: retq
203;
204; AVX2-LABEL: test17_undef:
205; AVX2: # %bb.0:
206; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
207; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0
208; AVX2-NEXT: retq
Sanjay Patelac23c462019-01-03 22:42:32 +0000209;
210; AVX512-LABEL: test17_undef:
211; AVX512: # %bb.0:
212; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
213; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0
214; AVX512-NEXT: retq
Sanjay Patelfd58d622019-01-03 22:26:51 +0000215 %vecext = extractelement <8 x i32> %a, i32 0
216 %vecext1 = extractelement <8 x i32> %a, i32 1
217 %add1 = add i32 %vecext, %vecext1
218 %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0
219 %vecext2 = extractelement <8 x i32> %a, i32 2
220 %vecext3 = extractelement <8 x i32> %a, i32 3
221 %add2 = add i32 %vecext2, %vecext3
222 %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1
223 %vecext4 = extractelement <8 x i32> %a, i32 4
224 %vecext5 = extractelement <8 x i32> %a, i32 5
225 %add3 = add i32 %vecext4, %vecext5
226 %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2
227 %vecext6 = extractelement <8 x i32> %a, i32 6
228 %vecext7 = extractelement <8 x i32> %a, i32 7
229 %add4 = add i32 %vecext6, %vecext7
230 %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3
231 ret <8 x i32> %vecinit4
232}
233
Sanjay Patelb8687c22019-01-03 22:55:18 +0000234define <16 x i32> @test17_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {
235; SSE-LABEL: test17_v16i32_undef:
236; SSE: # %bb.0:
237; SSE-NEXT: phaddd %xmm1, %xmm0
238; SSE-NEXT: retq
239;
240; AVX1-LABEL: test17_v16i32_undef:
241; AVX1: # %bb.0:
242; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
243; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
244; AVX1-NEXT: retq
245;
246; AVX2-LABEL: test17_v16i32_undef:
247; AVX2: # %bb.0:
248; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
249; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0
250; AVX2-NEXT: retq
251;
252; AVX512-LABEL: test17_v16i32_undef:
253; AVX512: # %bb.0:
254; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel40cd4b72019-01-11 14:27:59 +0000255; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
Sanjay Patelb8687c22019-01-03 22:55:18 +0000256; AVX512-NEXT: retq
257 %vecext = extractelement <16 x i32> %a, i32 0
258 %vecext1 = extractelement <16 x i32> %a, i32 1
259 %add1 = add i32 %vecext, %vecext1
260 %vecinit1 = insertelement <16 x i32> undef, i32 %add1, i32 0
261 %vecext2 = extractelement <16 x i32> %a, i32 2
262 %vecext3 = extractelement <16 x i32> %a, i32 3
263 %add2 = add i32 %vecext2, %vecext3
264 %vecinit2 = insertelement <16 x i32> %vecinit1, i32 %add2, i32 1
265 %vecext4 = extractelement <16 x i32> %a, i32 4
266 %vecext5 = extractelement <16 x i32> %a, i32 5
267 %add3 = add i32 %vecext4, %vecext5
268 %vecinit3 = insertelement <16 x i32> %vecinit2, i32 %add3, i32 2
269 %vecext6 = extractelement <16 x i32> %a, i32 6
270 %vecext7 = extractelement <16 x i32> %a, i32 7
271 %add4 = add i32 %vecext6, %vecext7
272 %vecinit4 = insertelement <16 x i32> %vecinit3, i32 %add4, i32 3
273 ret <16 x i32> %vecinit4
274}
275