blob: a4b255ce525e047f2decee11087521d9d88ac067 [file] [log] [blame]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
8
9;
10; vXf32 (accum)
11;
12
13define float @test_v2f32(float %a0, <2 x float> %a1) {
14; SSE2-LABEL: test_v2f32:
15; SSE2: # %bb.0:
16; SSE2-NEXT: mulss %xmm1, %xmm0
17; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
18; SSE2-NEXT: mulss %xmm1, %xmm0
19; SSE2-NEXT: retq
20;
21; SSE41-LABEL: test_v2f32:
22; SSE41: # %bb.0:
23; SSE41-NEXT: mulss %xmm1, %xmm0
24; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
25; SSE41-NEXT: mulss %xmm1, %xmm0
26; SSE41-NEXT: retq
27;
28; AVX-LABEL: test_v2f32:
29; AVX: # %bb.0:
30; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
31; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
32; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
33; AVX-NEXT: retq
34;
35; AVX512-LABEL: test_v2f32:
36; AVX512: # %bb.0:
37; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
38; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
39; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
40; AVX512-NEXT: retq
41 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1)
42 ret float %1
43}
44
45define float @test_v4f32(float %a0, <4 x float> %a1) {
46; SSE2-LABEL: test_v4f32:
47; SSE2: # %bb.0:
48; SSE2-NEXT: mulss %xmm1, %xmm0
49; SSE2-NEXT: movaps %xmm1, %xmm2
50; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
51; SSE2-NEXT: mulss %xmm2, %xmm0
52; SSE2-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +000053; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +000054; SSE2-NEXT: mulss %xmm2, %xmm0
55; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
56; SSE2-NEXT: mulss %xmm1, %xmm0
57; SSE2-NEXT: retq
58;
59; SSE41-LABEL: test_v4f32:
60; SSE41: # %bb.0:
61; SSE41-NEXT: mulss %xmm1, %xmm0
62; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
63; SSE41-NEXT: mulss %xmm2, %xmm0
64; SSE41-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +000065; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +000066; SSE41-NEXT: mulss %xmm2, %xmm0
67; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
68; SSE41-NEXT: mulss %xmm1, %xmm0
69; SSE41-NEXT: retq
70;
71; AVX-LABEL: test_v4f32:
72; AVX: # %bb.0:
73; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
74; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
75; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
76; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
77; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
78; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
79; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
80; AVX-NEXT: retq
81;
82; AVX512-LABEL: test_v4f32:
83; AVX512: # %bb.0:
84; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
85; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
86; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
87; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
88; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
89; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
90; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
91; AVX512-NEXT: retq
92 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1)
93 ret float %1
94}
95
96define float @test_v8f32(float %a0, <8 x float> %a1) {
97; SSE2-LABEL: test_v8f32:
98; SSE2: # %bb.0:
99; SSE2-NEXT: mulss %xmm1, %xmm0
100; SSE2-NEXT: movaps %xmm1, %xmm3
101; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
102; SSE2-NEXT: mulss %xmm3, %xmm0
103; SSE2-NEXT: movaps %xmm1, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000104; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000105; SSE2-NEXT: mulss %xmm3, %xmm0
106; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
107; SSE2-NEXT: mulss %xmm1, %xmm0
108; SSE2-NEXT: mulss %xmm2, %xmm0
109; SSE2-NEXT: movaps %xmm2, %xmm1
110; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
111; SSE2-NEXT: mulss %xmm1, %xmm0
112; SSE2-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000113; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000114; SSE2-NEXT: mulss %xmm1, %xmm0
115; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
116; SSE2-NEXT: mulss %xmm2, %xmm0
117; SSE2-NEXT: retq
118;
119; SSE41-LABEL: test_v8f32:
120; SSE41: # %bb.0:
121; SSE41-NEXT: mulss %xmm1, %xmm0
122; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
123; SSE41-NEXT: mulss %xmm3, %xmm0
124; SSE41-NEXT: movaps %xmm1, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000125; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000126; SSE41-NEXT: mulss %xmm3, %xmm0
127; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
128; SSE41-NEXT: mulss %xmm1, %xmm0
129; SSE41-NEXT: mulss %xmm2, %xmm0
130; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
131; SSE41-NEXT: mulss %xmm1, %xmm0
132; SSE41-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000133; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000134; SSE41-NEXT: mulss %xmm1, %xmm0
135; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
136; SSE41-NEXT: mulss %xmm2, %xmm0
137; SSE41-NEXT: retq
138;
139; AVX-LABEL: test_v8f32:
140; AVX: # %bb.0:
141; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
142; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
143; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
144; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
145; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
146; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
147; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
148; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
149; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
150; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
151; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
152; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
153; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
154; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
155; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
156; AVX-NEXT: vzeroupper
157; AVX-NEXT: retq
158;
159; AVX512-LABEL: test_v8f32:
160; AVX512: # %bb.0:
161; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
162; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
163; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
164; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
165; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
166; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
167; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
168; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
169; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
170; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
171; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
172; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
173; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
174; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
175; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
176; AVX512-NEXT: vzeroupper
177; AVX512-NEXT: retq
178 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
179 ret float %1
180}
181
182define float @test_v16f32(float %a0, <16 x float> %a1) {
183; SSE2-LABEL: test_v16f32:
184; SSE2: # %bb.0:
185; SSE2-NEXT: mulss %xmm1, %xmm0
186; SSE2-NEXT: movaps %xmm1, %xmm5
187; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
188; SSE2-NEXT: mulss %xmm5, %xmm0
189; SSE2-NEXT: movaps %xmm1, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000190; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000191; SSE2-NEXT: mulss %xmm5, %xmm0
192; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
193; SSE2-NEXT: mulss %xmm1, %xmm0
194; SSE2-NEXT: mulss %xmm2, %xmm0
195; SSE2-NEXT: movaps %xmm2, %xmm1
196; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
197; SSE2-NEXT: mulss %xmm1, %xmm0
198; SSE2-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000199; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000200; SSE2-NEXT: mulss %xmm1, %xmm0
201; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
202; SSE2-NEXT: mulss %xmm2, %xmm0
203; SSE2-NEXT: mulss %xmm3, %xmm0
204; SSE2-NEXT: movaps %xmm3, %xmm1
205; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
206; SSE2-NEXT: mulss %xmm1, %xmm0
207; SSE2-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000208; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000209; SSE2-NEXT: mulss %xmm1, %xmm0
210; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
211; SSE2-NEXT: mulss %xmm3, %xmm0
212; SSE2-NEXT: mulss %xmm4, %xmm0
213; SSE2-NEXT: movaps %xmm4, %xmm1
214; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
215; SSE2-NEXT: mulss %xmm1, %xmm0
216; SSE2-NEXT: movaps %xmm4, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000217; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000218; SSE2-NEXT: mulss %xmm1, %xmm0
219; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
220; SSE2-NEXT: mulss %xmm4, %xmm0
221; SSE2-NEXT: retq
222;
223; SSE41-LABEL: test_v16f32:
224; SSE41: # %bb.0:
225; SSE41-NEXT: mulss %xmm1, %xmm0
226; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
227; SSE41-NEXT: mulss %xmm5, %xmm0
228; SSE41-NEXT: movaps %xmm1, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000229; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000230; SSE41-NEXT: mulss %xmm5, %xmm0
231; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
232; SSE41-NEXT: mulss %xmm1, %xmm0
233; SSE41-NEXT: mulss %xmm2, %xmm0
234; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
235; SSE41-NEXT: mulss %xmm1, %xmm0
236; SSE41-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000237; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000238; SSE41-NEXT: mulss %xmm1, %xmm0
239; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
240; SSE41-NEXT: mulss %xmm2, %xmm0
241; SSE41-NEXT: mulss %xmm3, %xmm0
242; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
243; SSE41-NEXT: mulss %xmm1, %xmm0
244; SSE41-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000245; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000246; SSE41-NEXT: mulss %xmm1, %xmm0
247; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
248; SSE41-NEXT: mulss %xmm3, %xmm0
249; SSE41-NEXT: mulss %xmm4, %xmm0
250; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
251; SSE41-NEXT: mulss %xmm1, %xmm0
252; SSE41-NEXT: movaps %xmm4, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000253; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000254; SSE41-NEXT: mulss %xmm1, %xmm0
255; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
256; SSE41-NEXT: mulss %xmm4, %xmm0
257; SSE41-NEXT: retq
258;
259; AVX-LABEL: test_v16f32:
260; AVX: # %bb.0:
261; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
262; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
263; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0
264; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
265; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0
266; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3]
267; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0
268; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
269; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
270; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
271; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0
272; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
273; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0
274; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
275; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
276; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
277; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
278; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
279; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
280; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
281; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,1,2,3]
282; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
283; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1
284; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
285; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
286; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
287; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
288; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
289; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
290; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
291; AVX-NEXT: vzeroupper
292; AVX-NEXT: retq
293;
294; AVX512-LABEL: test_v16f32:
295; AVX512: # %bb.0:
296; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
297; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
298; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
299; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
300; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
301; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
302; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
303; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
304; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
305; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
306; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
307; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
308; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
309; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
310; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
311; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
312; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
313; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
314; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
315; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
316; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
317; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
318; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
319; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
320; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
321; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
322; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
323; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
324; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
325; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
326; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
327; AVX512-NEXT: vzeroupper
328; AVX512-NEXT: retq
329 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
330 ret float %1
331}
332
333;
334; vXf32 (one)
335;
336
337define float @test_v2f32_one(<2 x float> %a0) {
338; SSE2-LABEL: test_v2f32_one:
339; SSE2: # %bb.0:
340; SSE2-NEXT: movaps %xmm0, %xmm1
341; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
342; SSE2-NEXT: mulss %xmm0, %xmm1
343; SSE2-NEXT: movaps %xmm1, %xmm0
344; SSE2-NEXT: retq
345;
346; SSE41-LABEL: test_v2f32_one:
347; SSE41: # %bb.0:
348; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
349; SSE41-NEXT: mulss %xmm1, %xmm0
350; SSE41-NEXT: retq
351;
352; AVX-LABEL: test_v2f32_one:
353; AVX: # %bb.0:
354; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
355; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
356; AVX-NEXT: retq
357;
358; AVX512-LABEL: test_v2f32_one:
359; AVX512: # %bb.0:
360; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
361; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
362; AVX512-NEXT: retq
363 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0)
364 ret float %1
365}
366
367define float @test_v4f32_one(<4 x float> %a0) {
368; SSE2-LABEL: test_v4f32_one:
369; SSE2: # %bb.0:
370; SSE2-NEXT: movaps %xmm0, %xmm1
371; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
372; SSE2-NEXT: mulss %xmm0, %xmm1
373; SSE2-NEXT: movaps %xmm0, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000374; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000375; SSE2-NEXT: mulss %xmm1, %xmm2
376; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
377; SSE2-NEXT: mulss %xmm2, %xmm0
378; SSE2-NEXT: retq
379;
380; SSE41-LABEL: test_v4f32_one:
381; SSE41: # %bb.0:
382; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
383; SSE41-NEXT: mulss %xmm0, %xmm1
384; SSE41-NEXT: movaps %xmm0, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000385; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000386; SSE41-NEXT: mulss %xmm1, %xmm2
387; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
388; SSE41-NEXT: mulss %xmm2, %xmm0
389; SSE41-NEXT: retq
390;
391; AVX-LABEL: test_v4f32_one:
392; AVX: # %bb.0:
393; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
394; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
395; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
396; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
397; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
398; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
399; AVX-NEXT: retq
400;
401; AVX512-LABEL: test_v4f32_one:
402; AVX512: # %bb.0:
403; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
404; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
405; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
406; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
407; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
408; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
409; AVX512-NEXT: retq
410 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0)
411 ret float %1
412}
413
414define float @test_v8f32_one(<8 x float> %a0) {
415; SSE2-LABEL: test_v8f32_one:
416; SSE2: # %bb.0:
417; SSE2-NEXT: movaps %xmm0, %xmm2
418; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
419; SSE2-NEXT: mulss %xmm0, %xmm2
420; SSE2-NEXT: movaps %xmm0, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000421; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000422; SSE2-NEXT: mulss %xmm2, %xmm3
423; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
424; SSE2-NEXT: mulss %xmm3, %xmm0
425; SSE2-NEXT: mulss %xmm1, %xmm0
426; SSE2-NEXT: movaps %xmm1, %xmm2
427; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
428; SSE2-NEXT: mulss %xmm2, %xmm0
429; SSE2-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000430; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000431; SSE2-NEXT: mulss %xmm2, %xmm0
432; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
433; SSE2-NEXT: mulss %xmm1, %xmm0
434; SSE2-NEXT: retq
435;
436; SSE41-LABEL: test_v8f32_one:
437; SSE41: # %bb.0:
438; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
439; SSE41-NEXT: mulss %xmm0, %xmm2
440; SSE41-NEXT: movaps %xmm0, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000441; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000442; SSE41-NEXT: mulss %xmm2, %xmm3
443; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
444; SSE41-NEXT: mulss %xmm3, %xmm0
445; SSE41-NEXT: mulss %xmm1, %xmm0
446; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
447; SSE41-NEXT: mulss %xmm2, %xmm0
448; SSE41-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000449; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000450; SSE41-NEXT: mulss %xmm2, %xmm0
451; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
452; SSE41-NEXT: mulss %xmm1, %xmm0
453; SSE41-NEXT: retq
454;
455; AVX-LABEL: test_v8f32_one:
456; AVX: # %bb.0:
457; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
458; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
459; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
460; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
461; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
462; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
463; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
464; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm1
465; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
466; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
467; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
468; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
469; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
470; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
471; AVX-NEXT: vzeroupper
472; AVX-NEXT: retq
473;
474; AVX512-LABEL: test_v8f32_one:
475; AVX512: # %bb.0:
476; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
477; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
478; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
479; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
480; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
481; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
482; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
483; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
484; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
485; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
486; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
487; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
488; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
489; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
490; AVX512-NEXT: vzeroupper
491; AVX512-NEXT: retq
492 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
493 ret float %1
494}
495
496define float @test_v16f32_one(<16 x float> %a0) {
497; SSE2-LABEL: test_v16f32_one:
498; SSE2: # %bb.0:
499; SSE2-NEXT: movaps %xmm0, %xmm4
500; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
501; SSE2-NEXT: mulss %xmm0, %xmm4
502; SSE2-NEXT: movaps %xmm0, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000503; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000504; SSE2-NEXT: mulss %xmm4, %xmm5
505; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
506; SSE2-NEXT: mulss %xmm5, %xmm0
507; SSE2-NEXT: mulss %xmm1, %xmm0
508; SSE2-NEXT: movaps %xmm1, %xmm4
509; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
510; SSE2-NEXT: mulss %xmm4, %xmm0
511; SSE2-NEXT: movaps %xmm1, %xmm4
Craig Topper82385802018-09-11 17:57:27 +0000512; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000513; SSE2-NEXT: mulss %xmm4, %xmm0
514; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
515; SSE2-NEXT: mulss %xmm1, %xmm0
516; SSE2-NEXT: mulss %xmm2, %xmm0
517; SSE2-NEXT: movaps %xmm2, %xmm1
518; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
519; SSE2-NEXT: mulss %xmm1, %xmm0
520; SSE2-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000521; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000522; SSE2-NEXT: mulss %xmm1, %xmm0
523; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
524; SSE2-NEXT: mulss %xmm2, %xmm0
525; SSE2-NEXT: mulss %xmm3, %xmm0
526; SSE2-NEXT: movaps %xmm3, %xmm1
527; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
528; SSE2-NEXT: mulss %xmm1, %xmm0
529; SSE2-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000530; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000531; SSE2-NEXT: mulss %xmm1, %xmm0
532; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
533; SSE2-NEXT: mulss %xmm3, %xmm0
534; SSE2-NEXT: retq
535;
536; SSE41-LABEL: test_v16f32_one:
537; SSE41: # %bb.0:
538; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
539; SSE41-NEXT: mulss %xmm0, %xmm4
540; SSE41-NEXT: movaps %xmm0, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000541; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000542; SSE41-NEXT: mulss %xmm4, %xmm5
543; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
544; SSE41-NEXT: mulss %xmm5, %xmm0
545; SSE41-NEXT: mulss %xmm1, %xmm0
546; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
547; SSE41-NEXT: mulss %xmm4, %xmm0
548; SSE41-NEXT: movaps %xmm1, %xmm4
Craig Topper82385802018-09-11 17:57:27 +0000549; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000550; SSE41-NEXT: mulss %xmm4, %xmm0
551; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
552; SSE41-NEXT: mulss %xmm1, %xmm0
553; SSE41-NEXT: mulss %xmm2, %xmm0
554; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
555; SSE41-NEXT: mulss %xmm1, %xmm0
556; SSE41-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000557; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000558; SSE41-NEXT: mulss %xmm1, %xmm0
559; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
560; SSE41-NEXT: mulss %xmm2, %xmm0
561; SSE41-NEXT: mulss %xmm3, %xmm0
562; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
563; SSE41-NEXT: mulss %xmm1, %xmm0
564; SSE41-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000565; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000566; SSE41-NEXT: mulss %xmm1, %xmm0
567; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
568; SSE41-NEXT: mulss %xmm3, %xmm0
569; SSE41-NEXT: retq
570;
571; AVX-LABEL: test_v16f32_one:
572; AVX: # %bb.0:
573; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
574; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm2
575; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
576; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
577; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
578; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
579; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
580; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm2
581; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
582; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
583; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
584; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
585; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
586; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm0
587; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
588; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
589; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
590; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
591; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
592; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
593; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
594; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
595; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
596; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
597; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
598; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
599; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
600; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
601; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
602; AVX-NEXT: vzeroupper
603; AVX-NEXT: retq
604;
605; AVX512-LABEL: test_v16f32_one:
606; AVX512: # %bb.0:
607; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
608; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
609; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
610; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
611; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
612; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
613; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
614; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
615; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
616; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
617; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
618; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
619; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
620; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
621; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
622; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
623; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
624; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
625; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
626; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
627; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
628; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
629; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
630; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
631; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
632; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
633; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
634; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
635; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
636; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
637; AVX512-NEXT: vzeroupper
638; AVX512-NEXT: retq
639 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
640 ret float %1
641}
642
643;
644; vXf32 (undef)
645;
646
647define float @test_v2f32_undef(<2 x float> %a0) {
648; SSE2-LABEL: test_v2f32_undef:
649; SSE2: # %bb.0:
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000650; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000651; SSE2-NEXT: mulss {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000652; SSE2-NEXT: retq
653;
654; SSE41-LABEL: test_v2f32_undef:
655; SSE41: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000656; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
657; SSE41-NEXT: mulss {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000658; SSE41-NEXT: retq
659;
660; AVX-LABEL: test_v2f32_undef:
661; AVX: # %bb.0:
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000662; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000663; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000664; AVX-NEXT: retq
665;
666; AVX512-LABEL: test_v2f32_undef:
667; AVX512: # %bb.0:
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000668; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000669; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000670; AVX512-NEXT: retq
671 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
672 ret float %1
673}
674
675define float @test_v4f32_undef(<4 x float> %a0) {
676; SSE2-LABEL: test_v4f32_undef:
677; SSE2: # %bb.0:
678; SSE2-NEXT: movaps %xmm0, %xmm1
Sanjay Patel17a870f2018-05-21 23:54:19 +0000679; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
680; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000681; SSE2-NEXT: movaps %xmm0, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000682; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000683; SSE2-NEXT: mulss %xmm1, %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000684; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000685; SSE2-NEXT: mulss %xmm2, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000686; SSE2-NEXT: retq
687;
688; SSE41-LABEL: test_v4f32_undef:
689; SSE41: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000690; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
691; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000692; SSE41-NEXT: movaps %xmm0, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000693; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000694; SSE41-NEXT: mulss %xmm1, %xmm2
695; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
696; SSE41-NEXT: mulss %xmm2, %xmm0
697; SSE41-NEXT: retq
698;
699; AVX-LABEL: test_v4f32_undef:
700; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000701; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
702; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000703; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
704; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
705; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
706; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
707; AVX-NEXT: retq
708;
709; AVX512-LABEL: test_v4f32_undef:
710; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000711; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
712; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000713; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
714; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
715; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
716; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
717; AVX512-NEXT: retq
718 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0)
719 ret float %1
720}
721
722define float @test_v8f32_undef(<8 x float> %a0) {
723; SSE2-LABEL: test_v8f32_undef:
724; SSE2: # %bb.0:
725; SSE2-NEXT: movaps %xmm0, %xmm2
Sanjay Patel17a870f2018-05-21 23:54:19 +0000726; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
727; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000728; SSE2-NEXT: movaps %xmm0, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000729; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000730; SSE2-NEXT: mulss %xmm2, %xmm3
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000731; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000732; SSE2-NEXT: mulss %xmm3, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000733; SSE2-NEXT: mulss %xmm1, %xmm0
734; SSE2-NEXT: movaps %xmm1, %xmm2
735; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
736; SSE2-NEXT: mulss %xmm2, %xmm0
737; SSE2-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000738; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000739; SSE2-NEXT: mulss %xmm2, %xmm0
740; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
741; SSE2-NEXT: mulss %xmm1, %xmm0
742; SSE2-NEXT: retq
743;
744; SSE41-LABEL: test_v8f32_undef:
745; SSE41: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000746; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
747; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000748; SSE41-NEXT: movaps %xmm0, %xmm3
Craig Topper82385802018-09-11 17:57:27 +0000749; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000750; SSE41-NEXT: mulss %xmm2, %xmm3
751; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
752; SSE41-NEXT: mulss %xmm3, %xmm0
753; SSE41-NEXT: mulss %xmm1, %xmm0
754; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
755; SSE41-NEXT: mulss %xmm2, %xmm0
756; SSE41-NEXT: movaps %xmm1, %xmm2
Craig Topper82385802018-09-11 17:57:27 +0000757; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000758; SSE41-NEXT: mulss %xmm2, %xmm0
759; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
760; SSE41-NEXT: mulss %xmm1, %xmm0
761; SSE41-NEXT: retq
762;
763; AVX-LABEL: test_v8f32_undef:
764; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000765; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
766; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000767; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
768; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
769; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
770; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
771; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
772; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm1
773; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
774; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
775; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
776; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
777; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
778; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
779; AVX-NEXT: vzeroupper
780; AVX-NEXT: retq
781;
782; AVX512-LABEL: test_v8f32_undef:
783; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000784; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
785; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000786; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
787; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
788; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
789; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
790; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
791; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
792; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
793; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
794; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
795; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
796; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
797; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
798; AVX512-NEXT: vzeroupper
799; AVX512-NEXT: retq
800 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
801 ret float %1
802}
803
804define float @test_v16f32_undef(<16 x float> %a0) {
805; SSE2-LABEL: test_v16f32_undef:
806; SSE2: # %bb.0:
807; SSE2-NEXT: movaps %xmm0, %xmm4
Sanjay Patel17a870f2018-05-21 23:54:19 +0000808; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
809; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000810; SSE2-NEXT: movaps %xmm0, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000811; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000812; SSE2-NEXT: mulss %xmm4, %xmm5
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000813; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
Sanjay Patel17a870f2018-05-21 23:54:19 +0000814; SSE2-NEXT: mulss %xmm5, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000815; SSE2-NEXT: mulss %xmm1, %xmm0
816; SSE2-NEXT: movaps %xmm1, %xmm4
817; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
818; SSE2-NEXT: mulss %xmm4, %xmm0
819; SSE2-NEXT: movaps %xmm1, %xmm4
Craig Topper82385802018-09-11 17:57:27 +0000820; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000821; SSE2-NEXT: mulss %xmm4, %xmm0
822; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
823; SSE2-NEXT: mulss %xmm1, %xmm0
824; SSE2-NEXT: mulss %xmm2, %xmm0
825; SSE2-NEXT: movaps %xmm2, %xmm1
826; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
827; SSE2-NEXT: mulss %xmm1, %xmm0
828; SSE2-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000829; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000830; SSE2-NEXT: mulss %xmm1, %xmm0
831; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
832; SSE2-NEXT: mulss %xmm2, %xmm0
833; SSE2-NEXT: mulss %xmm3, %xmm0
834; SSE2-NEXT: movaps %xmm3, %xmm1
835; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
836; SSE2-NEXT: mulss %xmm1, %xmm0
837; SSE2-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000838; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000839; SSE2-NEXT: mulss %xmm1, %xmm0
840; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
841; SSE2-NEXT: mulss %xmm3, %xmm0
842; SSE2-NEXT: retq
843;
844; SSE41-LABEL: test_v16f32_undef:
845; SSE41: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000846; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
847; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000848; SSE41-NEXT: movaps %xmm0, %xmm5
Craig Topper82385802018-09-11 17:57:27 +0000849; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000850; SSE41-NEXT: mulss %xmm4, %xmm5
851; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
852; SSE41-NEXT: mulss %xmm5, %xmm0
853; SSE41-NEXT: mulss %xmm1, %xmm0
854; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
855; SSE41-NEXT: mulss %xmm4, %xmm0
856; SSE41-NEXT: movaps %xmm1, %xmm4
Craig Topper82385802018-09-11 17:57:27 +0000857; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000858; SSE41-NEXT: mulss %xmm4, %xmm0
859; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
860; SSE41-NEXT: mulss %xmm1, %xmm0
861; SSE41-NEXT: mulss %xmm2, %xmm0
862; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
863; SSE41-NEXT: mulss %xmm1, %xmm0
864; SSE41-NEXT: movaps %xmm2, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000865; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000866; SSE41-NEXT: mulss %xmm1, %xmm0
867; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
868; SSE41-NEXT: mulss %xmm2, %xmm0
869; SSE41-NEXT: mulss %xmm3, %xmm0
870; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
871; SSE41-NEXT: mulss %xmm1, %xmm0
872; SSE41-NEXT: movaps %xmm3, %xmm1
Craig Topper82385802018-09-11 17:57:27 +0000873; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000874; SSE41-NEXT: mulss %xmm1, %xmm0
875; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
876; SSE41-NEXT: mulss %xmm3, %xmm0
877; SSE41-NEXT: retq
878;
879; AVX-LABEL: test_v16f32_undef:
880; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000881; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
882; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000883; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
884; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
885; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
886; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
887; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
888; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm2
889; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
890; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
891; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
892; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
893; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
894; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm0
895; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
896; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
897; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
898; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
899; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
900; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
901; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
902; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
903; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
904; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
905; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
906; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
907; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
908; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
909; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
910; AVX-NEXT: vzeroupper
911; AVX-NEXT: retq
912;
913; AVX512-LABEL: test_v16f32_undef:
914; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +0000915; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
916; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000917; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
918; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
919; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
920; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
921; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
922; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
923; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
924; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
925; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
926; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
927; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
928; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
929; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
930; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
931; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
932; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
933; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
934; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
935; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
936; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
937; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
938; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
939; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
940; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
941; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
942; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
943; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
944; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
945; AVX512-NEXT: vzeroupper
946; AVX512-NEXT: retq
947 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
948 ret float %1
949}
950
951;
952; vXf64 (accum)
953;
954
955define double @test_v2f64(double %a0, <2 x double> %a1) {
956; SSE-LABEL: test_v2f64:
957; SSE: # %bb.0:
958; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +0000959; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000960; SSE-NEXT: mulsd %xmm1, %xmm0
961; SSE-NEXT: retq
962;
963; AVX-LABEL: test_v2f64:
964; AVX: # %bb.0:
965; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
966; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
967; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
968; AVX-NEXT: retq
969;
970; AVX512-LABEL: test_v2f64:
971; AVX512: # %bb.0:
972; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
973; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
974; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
975; AVX512-NEXT: retq
976 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1)
977 ret double %1
978}
979
980define double @test_v4f64(double %a0, <4 x double> %a1) {
981; SSE-LABEL: test_v4f64:
982; SSE: # %bb.0:
983; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +0000984; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000985; SSE-NEXT: mulsd %xmm1, %xmm0
986; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +0000987; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +0000988; SSE-NEXT: mulsd %xmm2, %xmm0
989; SSE-NEXT: retq
990;
991; AVX-LABEL: test_v4f64:
992; AVX: # %bb.0:
993; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
994; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
995; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
996; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
997; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
998; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
999; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1000; AVX-NEXT: vzeroupper
1001; AVX-NEXT: retq
1002;
1003; AVX512-LABEL: test_v4f64:
1004; AVX512: # %bb.0:
1005; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1006; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1007; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1008; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
1009; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1010; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1011; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1012; AVX512-NEXT: vzeroupper
1013; AVX512-NEXT: retq
1014 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
1015 ret double %1
1016}
1017
1018define double @test_v8f64(double %a0, <8 x double> %a1) {
1019; SSE-LABEL: test_v8f64:
1020; SSE: # %bb.0:
1021; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001022; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001023; SSE-NEXT: mulsd %xmm1, %xmm0
1024; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001025; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001026; SSE-NEXT: mulsd %xmm2, %xmm0
1027; SSE-NEXT: mulsd %xmm3, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001028; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001029; SSE-NEXT: mulsd %xmm3, %xmm0
1030; SSE-NEXT: mulsd %xmm4, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001031; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001032; SSE-NEXT: mulsd %xmm4, %xmm0
1033; SSE-NEXT: retq
1034;
1035; AVX-LABEL: test_v8f64:
1036; AVX: # %bb.0:
1037; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1038; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
1039; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1040; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1041; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1042; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1043; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1044; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1045; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1046; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1047; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1
1048; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1049; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1050; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1051; AVX-NEXT: vzeroupper
1052; AVX-NEXT: retq
1053;
1054; AVX512-LABEL: test_v8f64:
1055; AVX512: # %bb.0:
1056; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1057; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1058; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1059; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1060; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1061; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1062; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1063; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1064; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1065; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1066; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1067; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1068; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1069; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1070; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1071; AVX512-NEXT: vzeroupper
1072; AVX512-NEXT: retq
1073 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
1074 ret double %1
1075}
1076
1077define double @test_v16f64(double %a0, <16 x double> %a1) {
1078; SSE-LABEL: test_v16f64:
1079; SSE: # %bb.0:
1080; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
1081; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001082; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001083; SSE-NEXT: mulsd %xmm1, %xmm0
1084; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001085; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001086; SSE-NEXT: mulsd %xmm2, %xmm0
1087; SSE-NEXT: mulsd %xmm3, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001088; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001089; SSE-NEXT: mulsd %xmm3, %xmm0
1090; SSE-NEXT: mulsd %xmm4, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001091; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001092; SSE-NEXT: mulsd %xmm4, %xmm0
1093; SSE-NEXT: mulsd %xmm5, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001094; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001095; SSE-NEXT: mulsd %xmm5, %xmm0
1096; SSE-NEXT: mulsd %xmm6, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001097; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001098; SSE-NEXT: mulsd %xmm6, %xmm0
1099; SSE-NEXT: mulsd %xmm7, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001100; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001101; SSE-NEXT: mulsd %xmm7, %xmm0
1102; SSE-NEXT: mulsd %xmm8, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001103; SSE-NEXT: unpckhpd {{.*#+}} xmm8 = xmm8[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001104; SSE-NEXT: mulsd %xmm8, %xmm0
1105; SSE-NEXT: retq
1106;
1107; AVX-LABEL: test_v16f64:
1108; AVX: # %bb.0:
1109; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1110; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
1111; AVX-NEXT: vmulsd %xmm5, %xmm0, %xmm0
1112; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1113; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1114; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1115; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1116; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1117; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1118; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1119; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1
1120; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1121; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1122; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1123; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1124; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0]
1125; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1126; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1
1127; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1128; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1129; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1130; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0
1131; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm4[1,0]
1132; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1133; AVX-NEXT: vextractf128 $1, %ymm4, %xmm1
1134; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1135; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1136; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1137; AVX-NEXT: vzeroupper
1138; AVX-NEXT: retq
1139;
1140; AVX512-LABEL: test_v16f64:
1141; AVX512: # %bb.0:
1142; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1143; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
1144; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1145; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
1146; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1147; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1148; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1149; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm3
1150; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1151; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1152; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1153; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1154; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1155; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1156; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1157; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1158; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1159; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1160; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm1
1161; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1162; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1163; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1164; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm1
1165; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1166; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1167; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1168; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm1
1169; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1170; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1171; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1172; AVX512-NEXT: vzeroupper
1173; AVX512-NEXT: retq
1174 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
1175 ret double %1
1176}
1177
1178;
1179; vXf64 (one)
1180;
1181
1182define double @test_v2f64_one(<2 x double> %a0) {
1183; SSE-LABEL: test_v2f64_one:
1184; SSE: # %bb.0:
Craig Topper82385802018-09-11 17:57:27 +00001185; SSE-NEXT: movapd %xmm0, %xmm1
1186; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001187; SSE-NEXT: mulsd %xmm0, %xmm1
1188; SSE-NEXT: movapd %xmm1, %xmm0
1189; SSE-NEXT: retq
1190;
1191; AVX-LABEL: test_v2f64_one:
1192; AVX: # %bb.0:
1193; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1194; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1195; AVX-NEXT: retq
1196;
1197; AVX512-LABEL: test_v2f64_one:
1198; AVX512: # %bb.0:
1199; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1200; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1201; AVX512-NEXT: retq
1202 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0)
1203 ret double %1
1204}
1205
1206define double @test_v4f64_one(<4 x double> %a0) {
1207; SSE-LABEL: test_v4f64_one:
1208; SSE: # %bb.0:
Craig Topper82385802018-09-11 17:57:27 +00001209; SSE-NEXT: movapd %xmm0, %xmm2
1210; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001211; SSE-NEXT: mulsd %xmm0, %xmm2
1212; SSE-NEXT: mulsd %xmm1, %xmm2
Craig Topper04238812018-08-02 16:48:01 +00001213; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001214; SSE-NEXT: mulsd %xmm1, %xmm2
1215; SSE-NEXT: movapd %xmm2, %xmm0
1216; SSE-NEXT: retq
1217;
1218; AVX-LABEL: test_v4f64_one:
1219; AVX: # %bb.0:
1220; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1221; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm1
1222; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1223; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1224; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1225; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1226; AVX-NEXT: vzeroupper
1227; AVX-NEXT: retq
1228;
1229; AVX512-LABEL: test_v4f64_one:
1230; AVX512: # %bb.0:
1231; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1232; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1
1233; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1234; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1235; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1236; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1237; AVX512-NEXT: vzeroupper
1238; AVX512-NEXT: retq
1239 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
1240 ret double %1
1241}
1242
1243define double @test_v8f64_one(<8 x double> %a0) {
1244; SSE-LABEL: test_v8f64_one:
1245; SSE: # %bb.0:
Craig Topper82385802018-09-11 17:57:27 +00001246; SSE-NEXT: movapd %xmm0, %xmm4
1247; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001248; SSE-NEXT: mulsd %xmm0, %xmm4
1249; SSE-NEXT: mulsd %xmm1, %xmm4
Craig Topper04238812018-08-02 16:48:01 +00001250; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001251; SSE-NEXT: mulsd %xmm1, %xmm4
1252; SSE-NEXT: mulsd %xmm2, %xmm4
Craig Topper04238812018-08-02 16:48:01 +00001253; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001254; SSE-NEXT: mulsd %xmm2, %xmm4
1255; SSE-NEXT: mulsd %xmm3, %xmm4
Craig Topper04238812018-08-02 16:48:01 +00001256; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001257; SSE-NEXT: mulsd %xmm3, %xmm4
1258; SSE-NEXT: movapd %xmm4, %xmm0
1259; SSE-NEXT: retq
1260;
1261; AVX-LABEL: test_v8f64_one:
1262; AVX: # %bb.0:
1263; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1264; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm2
1265; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1266; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1267; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1268; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1269; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1270; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1271; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1272; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1273; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1274; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1275; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1276; AVX-NEXT: vzeroupper
1277; AVX-NEXT: retq
1278;
1279; AVX512-LABEL: test_v8f64_one:
1280; AVX512: # %bb.0:
1281; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1282; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1
1283; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1284; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1285; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1286; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1287; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1288; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1289; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1290; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1291; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1292; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1293; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1294; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1295; AVX512-NEXT: vzeroupper
1296; AVX512-NEXT: retq
1297 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
1298 ret double %1
1299}
1300
1301define double @test_v16f64_one(<16 x double> %a0) {
1302; SSE-LABEL: test_v16f64_one:
1303; SSE: # %bb.0:
Craig Topper04238812018-08-02 16:48:01 +00001304; SSE-NEXT: movapd %xmm0, %xmm8
1305; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001306; SSE-NEXT: mulsd %xmm8, %xmm0
1307; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001308; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001309; SSE-NEXT: mulsd %xmm1, %xmm0
1310; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001311; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001312; SSE-NEXT: mulsd %xmm2, %xmm0
1313; SSE-NEXT: mulsd %xmm3, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001314; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001315; SSE-NEXT: mulsd %xmm3, %xmm0
1316; SSE-NEXT: mulsd %xmm4, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001317; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001318; SSE-NEXT: mulsd %xmm4, %xmm0
1319; SSE-NEXT: mulsd %xmm5, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001320; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001321; SSE-NEXT: mulsd %xmm5, %xmm0
1322; SSE-NEXT: mulsd %xmm6, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001323; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001324; SSE-NEXT: mulsd %xmm6, %xmm0
1325; SSE-NEXT: mulsd %xmm7, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001326; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001327; SSE-NEXT: mulsd %xmm7, %xmm0
1328; SSE-NEXT: retq
1329;
1330; AVX-LABEL: test_v16f64_one:
1331; AVX: # %bb.0:
1332; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
1333; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm4
1334; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1335; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4
1336; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1337; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm0
1338; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1339; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
1340; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0
1341; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1342; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1343; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1344; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1345; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1346; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1347; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1348; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1
1349; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1350; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1351; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1352; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1353; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0]
1354; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1355; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1
1356; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1357; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1358; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1359; AVX-NEXT: vzeroupper
1360; AVX-NEXT: retq
1361;
1362; AVX512-LABEL: test_v16f64_one:
1363; AVX512: # %bb.0:
1364; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1365; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm2
1366; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1367; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1368; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1369; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1370; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1371; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1372; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1373; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1374; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1375; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1376; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1377; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1378; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1379; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1380; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1381; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1382; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1383; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1384; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1385; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1386; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1387; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1388; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1389; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1390; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1391; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1392; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1393; AVX512-NEXT: vzeroupper
1394; AVX512-NEXT: retq
1395 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
1396 ret double %1
1397}
1398
1399;
1400; vXf64 (undef)
1401;
1402
1403define double @test_v2f64_undef(<2 x double> %a0) {
1404; SSE-LABEL: test_v2f64_undef:
1405; SSE: # %bb.0:
Craig Topper04238812018-08-02 16:48:01 +00001406; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001407; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001408; SSE-NEXT: retq
1409;
1410; AVX-LABEL: test_v2f64_undef:
1411; AVX: # %bb.0:
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001412; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001413; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001414; AVX-NEXT: retq
1415;
1416; AVX512-LABEL: test_v2f64_undef:
1417; AVX512: # %bb.0:
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001418; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001419; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001420; AVX512-NEXT: retq
1421 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
1422 ret double %1
1423}
1424
1425define double @test_v4f64_undef(<4 x double> %a0) {
1426; SSE-LABEL: test_v4f64_undef:
1427; SSE: # %bb.0:
Craig Topper04238812018-08-02 16:48:01 +00001428; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001429; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001430; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001431; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001432; SSE-NEXT: mulsd %xmm1, %xmm0
1433; SSE-NEXT: retq
1434;
1435; AVX-LABEL: test_v4f64_undef:
1436; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001437; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1438; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001439; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1440; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1441; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1442; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1443; AVX-NEXT: vzeroupper
1444; AVX-NEXT: retq
1445;
1446; AVX512-LABEL: test_v4f64_undef:
1447; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001448; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1449; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001450; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1451; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1452; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1453; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1454; AVX512-NEXT: vzeroupper
1455; AVX512-NEXT: retq
1456 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
1457 ret double %1
1458}
1459
1460define double @test_v8f64_undef(<8 x double> %a0) {
1461; SSE-LABEL: test_v8f64_undef:
1462; SSE: # %bb.0:
Craig Topper04238812018-08-02 16:48:01 +00001463; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001464; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001465; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001466; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001467; SSE-NEXT: mulsd %xmm1, %xmm0
1468; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001469; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001470; SSE-NEXT: mulsd %xmm2, %xmm0
1471; SSE-NEXT: mulsd %xmm3, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001472; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001473; SSE-NEXT: mulsd %xmm3, %xmm0
1474; SSE-NEXT: retq
1475;
1476; AVX-LABEL: test_v8f64_undef:
1477; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001478; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1479; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001480; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1481; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1482; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1483; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1484; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1485; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1486; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1487; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1488; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1489; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1490; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1491; AVX-NEXT: vzeroupper
1492; AVX-NEXT: retq
1493;
1494; AVX512-LABEL: test_v8f64_undef:
1495; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001496; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1497; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001498; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1499; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1500; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1501; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1502; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1503; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1504; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1505; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1506; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1507; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1508; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1509; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1510; AVX512-NEXT: vzeroupper
1511; AVX512-NEXT: retq
1512 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
1513 ret double %1
1514}
1515
1516define double @test_v16f64_undef(<16 x double> %a0) {
1517; SSE-LABEL: test_v16f64_undef:
1518; SSE: # %bb.0:
Craig Topper04238812018-08-02 16:48:01 +00001519; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
Sanjay Patel17a870f2018-05-21 23:54:19 +00001520; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001521; SSE-NEXT: mulsd %xmm1, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001522; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001523; SSE-NEXT: mulsd %xmm1, %xmm0
1524; SSE-NEXT: mulsd %xmm2, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001525; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001526; SSE-NEXT: mulsd %xmm2, %xmm0
1527; SSE-NEXT: mulsd %xmm3, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001528; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001529; SSE-NEXT: mulsd %xmm3, %xmm0
1530; SSE-NEXT: mulsd %xmm4, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001531; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001532; SSE-NEXT: mulsd %xmm4, %xmm0
1533; SSE-NEXT: mulsd %xmm5, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001534; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001535; SSE-NEXT: mulsd %xmm5, %xmm0
1536; SSE-NEXT: mulsd %xmm6, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001537; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001538; SSE-NEXT: mulsd %xmm6, %xmm0
1539; SSE-NEXT: mulsd %xmm7, %xmm0
Craig Topper04238812018-08-02 16:48:01 +00001540; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1]
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001541; SSE-NEXT: mulsd %xmm7, %xmm0
1542; SSE-NEXT: retq
1543;
1544; AVX-LABEL: test_v16f64_undef:
1545; AVX: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001546; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
1547; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm4, %xmm4
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001548; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1549; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4
1550; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1551; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm0
1552; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1553; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
1554; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0
1555; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
1556; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1557; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1558; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1559; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1560; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1561; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1562; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1
1563; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1564; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1565; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1566; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1567; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0]
1568; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1569; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1
1570; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1571; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1572; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1573; AVX-NEXT: vzeroupper
1574; AVX-NEXT: retq
1575;
1576; AVX512-LABEL: test_v16f64_undef:
1577; AVX512: # %bb.0:
Sanjay Patel17a870f2018-05-21 23:54:19 +00001578; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1579; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2
Simon Pilgrim14566ea2018-04-09 16:01:44 +00001580; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1581; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1582; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1583; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1584; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1585; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1586; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1587; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1588; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1589; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1590; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1591; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1592; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1593; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1594; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1595; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1596; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1597; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1598; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1599; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1600; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1601; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1602; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1603; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1604; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1605; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1606; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1607; AVX512-NEXT: vzeroupper
1608; AVX512-NEXT: retq
1609 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
1610 ret double %1
1611}
1612
1613declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>)
1614declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>)
1615declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>)
1616declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>)
1617
1618declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>)
1619declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>)
1620declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>)
1621declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>)