blob: 8b1c6d0c88252148ed4d97e325f77b8a3e2ca1ad [file] [log] [blame]
Chandler Carruth7a4459f2014-10-03 11:30:02 +00001; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
4
5target triple = "x86_64-unknown-unknown"
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +00006
7; Ensure that the backend no longer emits unnecessary vector insert
8; instructions immediately after SSE scalar fp instructions
9; like addss or mulss.
10
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000011define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000012; SSE-LABEL: test_add_ss:
13; SSE: # BB#0:
14; SSE-NEXT: addss %xmm1, %xmm0
15; SSE-NEXT: retq
16;
17; AVX-LABEL: test_add_ss:
18; AVX: # BB#0:
19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000021 %1 = extractelement <4 x float> %b, i32 0
22 %2 = extractelement <4 x float> %a, i32 0
23 %add = fadd float %2, %1
24 %3 = insertelement <4 x float> %a, float %add, i32 0
25 ret <4 x float> %3
26}
27
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000028define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000029; SSE-LABEL: test_sub_ss:
30; SSE: # BB#0:
31; SSE-NEXT: subss %xmm1, %xmm0
32; SSE-NEXT: retq
33;
34; AVX-LABEL: test_sub_ss:
35; AVX: # BB#0:
36; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
37; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000038 %1 = extractelement <4 x float> %b, i32 0
39 %2 = extractelement <4 x float> %a, i32 0
40 %sub = fsub float %2, %1
41 %3 = insertelement <4 x float> %a, float %sub, i32 0
42 ret <4 x float> %3
43}
44
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000045define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000046; SSE-LABEL: test_mul_ss:
47; SSE: # BB#0:
48; SSE-NEXT: mulss %xmm1, %xmm0
49; SSE-NEXT: retq
50;
51; AVX-LABEL: test_mul_ss:
52; AVX: # BB#0:
53; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
54; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000055 %1 = extractelement <4 x float> %b, i32 0
56 %2 = extractelement <4 x float> %a, i32 0
57 %mul = fmul float %2, %1
58 %3 = insertelement <4 x float> %a, float %mul, i32 0
59 ret <4 x float> %3
60}
61
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000062define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000063; SSE-LABEL: test_div_ss:
64; SSE: # BB#0:
65; SSE-NEXT: divss %xmm1, %xmm0
66; SSE-NEXT: retq
67;
68; AVX-LABEL: test_div_ss:
69; AVX: # BB#0:
70; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
71; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000072 %1 = extractelement <4 x float> %b, i32 0
73 %2 = extractelement <4 x float> %a, i32 0
74 %div = fdiv float %2, %1
75 %3 = insertelement <4 x float> %a, float %div, i32 0
76 ret <4 x float> %3
77}
78
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000079define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000080; SSE-LABEL: test_add_sd:
81; SSE: # BB#0:
82; SSE-NEXT: addsd %xmm1, %xmm0
83; SSE-NEXT: retq
84;
85; AVX-LABEL: test_add_sd:
86; AVX: # BB#0:
87; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
88; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000089 %1 = extractelement <2 x double> %b, i32 0
90 %2 = extractelement <2 x double> %a, i32 0
91 %add = fadd double %2, %1
92 %3 = insertelement <2 x double> %a, double %add, i32 0
93 ret <2 x double> %3
94}
95
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000096define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000097; SSE-LABEL: test_sub_sd:
98; SSE: # BB#0:
99; SSE-NEXT: subsd %xmm1, %xmm0
100; SSE-NEXT: retq
101;
102; AVX-LABEL: test_sub_sd:
103; AVX: # BB#0:
104; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
105; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000106 %1 = extractelement <2 x double> %b, i32 0
107 %2 = extractelement <2 x double> %a, i32 0
108 %sub = fsub double %2, %1
109 %3 = insertelement <2 x double> %a, double %sub, i32 0
110 ret <2 x double> %3
111}
112
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000113define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000114; SSE-LABEL: test_mul_sd:
115; SSE: # BB#0:
116; SSE-NEXT: mulsd %xmm1, %xmm0
117; SSE-NEXT: retq
118;
119; AVX-LABEL: test_mul_sd:
120; AVX: # BB#0:
121; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
122; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000123 %1 = extractelement <2 x double> %b, i32 0
124 %2 = extractelement <2 x double> %a, i32 0
125 %mul = fmul double %2, %1
126 %3 = insertelement <2 x double> %a, double %mul, i32 0
127 ret <2 x double> %3
128}
129
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000130define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000131; SSE-LABEL: test_div_sd:
132; SSE: # BB#0:
133; SSE-NEXT: divsd %xmm1, %xmm0
134; SSE-NEXT: retq
135;
136; AVX-LABEL: test_div_sd:
137; AVX: # BB#0:
138; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
139; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000140 %1 = extractelement <2 x double> %b, i32 0
141 %2 = extractelement <2 x double> %a, i32 0
142 %div = fdiv double %2, %1
143 %3 = insertelement <2 x double> %a, double %div, i32 0
144 ret <2 x double> %3
145}
146
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000147define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000148; SSE-LABEL: test2_add_ss:
149; SSE: # BB#0:
150; SSE-NEXT: addss %xmm0, %xmm1
151; SSE-NEXT: movaps %xmm1, %xmm0
152; SSE-NEXT: retq
153;
154; AVX-LABEL: test2_add_ss:
155; AVX: # BB#0:
156; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
157; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000158 %1 = extractelement <4 x float> %a, i32 0
159 %2 = extractelement <4 x float> %b, i32 0
160 %add = fadd float %1, %2
161 %3 = insertelement <4 x float> %b, float %add, i32 0
162 ret <4 x float> %3
163}
164
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000165define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000166; SSE-LABEL: test2_sub_ss:
167; SSE: # BB#0:
168; SSE-NEXT: subss %xmm0, %xmm1
169; SSE-NEXT: movaps %xmm1, %xmm0
170; SSE-NEXT: retq
171;
172; AVX-LABEL: test2_sub_ss:
173; AVX: # BB#0:
174; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
175; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000176 %1 = extractelement <4 x float> %a, i32 0
177 %2 = extractelement <4 x float> %b, i32 0
178 %sub = fsub float %2, %1
179 %3 = insertelement <4 x float> %b, float %sub, i32 0
180 ret <4 x float> %3
181}
182
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000183define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000184; SSE-LABEL: test2_mul_ss:
185; SSE: # BB#0:
186; SSE-NEXT: mulss %xmm0, %xmm1
187; SSE-NEXT: movaps %xmm1, %xmm0
188; SSE-NEXT: retq
189;
190; AVX-LABEL: test2_mul_ss:
191; AVX: # BB#0:
192; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
193; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000194 %1 = extractelement <4 x float> %a, i32 0
195 %2 = extractelement <4 x float> %b, i32 0
196 %mul = fmul float %1, %2
197 %3 = insertelement <4 x float> %b, float %mul, i32 0
198 ret <4 x float> %3
199}
200
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000201define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000202; SSE-LABEL: test2_div_ss:
203; SSE: # BB#0:
204; SSE-NEXT: divss %xmm0, %xmm1
205; SSE-NEXT: movaps %xmm1, %xmm0
206; SSE-NEXT: retq
207;
208; AVX-LABEL: test2_div_ss:
209; AVX: # BB#0:
210; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
211; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000212 %1 = extractelement <4 x float> %a, i32 0
213 %2 = extractelement <4 x float> %b, i32 0
214 %div = fdiv float %2, %1
215 %3 = insertelement <4 x float> %b, float %div, i32 0
216 ret <4 x float> %3
217}
218
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000219define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000220; SSE-LABEL: test2_add_sd:
221; SSE: # BB#0:
222; SSE-NEXT: addsd %xmm0, %xmm1
223; SSE-NEXT: movaps %xmm1, %xmm0
224; SSE-NEXT: retq
225;
226; AVX-LABEL: test2_add_sd:
227; AVX: # BB#0:
228; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
229; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000230 %1 = extractelement <2 x double> %a, i32 0
231 %2 = extractelement <2 x double> %b, i32 0
232 %add = fadd double %1, %2
233 %3 = insertelement <2 x double> %b, double %add, i32 0
234 ret <2 x double> %3
235}
236
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000237define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000238; SSE-LABEL: test2_sub_sd:
239; SSE: # BB#0:
240; SSE-NEXT: subsd %xmm0, %xmm1
241; SSE-NEXT: movaps %xmm1, %xmm0
242; SSE-NEXT: retq
243;
244; AVX-LABEL: test2_sub_sd:
245; AVX: # BB#0:
246; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
247; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000248 %1 = extractelement <2 x double> %a, i32 0
249 %2 = extractelement <2 x double> %b, i32 0
250 %sub = fsub double %2, %1
251 %3 = insertelement <2 x double> %b, double %sub, i32 0
252 ret <2 x double> %3
253}
254
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000255define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000256; SSE-LABEL: test2_mul_sd:
257; SSE: # BB#0:
258; SSE-NEXT: mulsd %xmm0, %xmm1
259; SSE-NEXT: movaps %xmm1, %xmm0
260; SSE-NEXT: retq
261;
262; AVX-LABEL: test2_mul_sd:
263; AVX: # BB#0:
264; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
265; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000266 %1 = extractelement <2 x double> %a, i32 0
267 %2 = extractelement <2 x double> %b, i32 0
268 %mul = fmul double %1, %2
269 %3 = insertelement <2 x double> %b, double %mul, i32 0
270 ret <2 x double> %3
271}
272
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000273define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000274; SSE-LABEL: test2_div_sd:
275; SSE: # BB#0:
276; SSE-NEXT: divsd %xmm0, %xmm1
277; SSE-NEXT: movaps %xmm1, %xmm0
278; SSE-NEXT: retq
279;
280; AVX-LABEL: test2_div_sd:
281; AVX: # BB#0:
282; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
283; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000284 %1 = extractelement <2 x double> %a, i32 0
285 %2 = extractelement <2 x double> %b, i32 0
286 %div = fdiv double %2, %1
287 %3 = insertelement <2 x double> %b, double %div, i32 0
288 ret <2 x double> %3
289}
290
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000291define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000292; SSE-LABEL: test_multiple_add_ss:
293; SSE: # BB#0:
294; SSE-NEXT: addss %xmm0, %xmm1
295; SSE-NEXT: addss %xmm1, %xmm0
296; SSE-NEXT: retq
297;
298; AVX-LABEL: test_multiple_add_ss:
299; AVX: # BB#0:
300; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
301; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
302; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000303 %1 = extractelement <4 x float> %b, i32 0
304 %2 = extractelement <4 x float> %a, i32 0
305 %add = fadd float %2, %1
306 %add2 = fadd float %2, %add
307 %3 = insertelement <4 x float> %a, float %add2, i32 0
308 ret <4 x float> %3
309}
310
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000311define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000312; SSE-LABEL: test_multiple_sub_ss:
313; SSE: # BB#0:
314; SSE-NEXT: movaps %xmm0, %xmm2
315; SSE-NEXT: subss %xmm1, %xmm2
316; SSE-NEXT: subss %xmm2, %xmm0
317; SSE-NEXT: retq
318;
319; AVX-LABEL: test_multiple_sub_ss:
320; AVX: # BB#0:
321; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
322; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
323; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000324 %1 = extractelement <4 x float> %b, i32 0
325 %2 = extractelement <4 x float> %a, i32 0
326 %sub = fsub float %2, %1
327 %sub2 = fsub float %2, %sub
328 %3 = insertelement <4 x float> %a, float %sub2, i32 0
329 ret <4 x float> %3
330}
331
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000332define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000333; SSE-LABEL: test_multiple_mul_ss:
334; SSE: # BB#0:
335; SSE-NEXT: mulss %xmm0, %xmm1
336; SSE-NEXT: mulss %xmm1, %xmm0
337; SSE-NEXT: retq
338;
339; AVX-LABEL: test_multiple_mul_ss:
340; AVX: # BB#0:
341; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
342; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
343; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000344 %1 = extractelement <4 x float> %b, i32 0
345 %2 = extractelement <4 x float> %a, i32 0
346 %mul = fmul float %2, %1
347 %mul2 = fmul float %2, %mul
348 %3 = insertelement <4 x float> %a, float %mul2, i32 0
349 ret <4 x float> %3
350}
351
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000352define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000353; SSE-LABEL: test_multiple_div_ss:
354; SSE: # BB#0:
355; SSE-NEXT: movaps %xmm0, %xmm2
356; SSE-NEXT: divss %xmm1, %xmm2
357; SSE-NEXT: divss %xmm2, %xmm0
358; SSE-NEXT: retq
359;
360; AVX-LABEL: test_multiple_div_ss:
361; AVX: # BB#0:
362; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
363; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
364; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000365 %1 = extractelement <4 x float> %b, i32 0
366 %2 = extractelement <4 x float> %a, i32 0
367 %div = fdiv float %2, %1
368 %div2 = fdiv float %2, %div
369 %3 = insertelement <4 x float> %a, float %div2, i32 0
370 ret <4 x float> %3
371}
372
Sanjay Patelbaa6bc32015-02-13 21:52:42 +0000373; With SSE4.1 or greater, the shuffles in the following tests may
374; be lowered to X86Blendi nodes.
375
376define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
377; SSE-LABEL: blend_add_ss:
378; SSE: # BB#0:
379; SSE-NEXT: addss %xmm1, %xmm0
380; SSE-NEXT: retq
381;
382; AVX-LABEL: blend_add_ss:
383; AVX: # BB#0:
384; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
385; AVX-NEXT: retq
386
387 %ext = extractelement <4 x float> %a, i32 0
388 %op = fadd float %b, %ext
389 %ins = insertelement <4 x float> undef, float %op, i32 0
390 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
391 ret <4 x float> %shuf
392}
393
394define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
395; SSE-LABEL: blend_sub_ss:
396; SSE: # BB#0:
397; SSE-NEXT: subss %xmm1, %xmm0
398; SSE-NEXT: retq
399;
400; AVX-LABEL: blend_sub_ss:
401; AVX: # BB#0:
402; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
403; AVX-NEXT: retq
404
405 %ext = extractelement <4 x float> %a, i32 0
406 %op = fsub float %ext, %b
407 %ins = insertelement <4 x float> undef, float %op, i32 0
408 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
409 ret <4 x float> %shuf
410}
411
412define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
413; SSE-LABEL: blend_mul_ss:
414; SSE: # BB#0:
415; SSE-NEXT: mulss %xmm1, %xmm0
416; SSE-NEXT: retq
417;
418; AVX-LABEL: blend_mul_ss:
419; AVX: # BB#0:
420; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
421; AVX-NEXT: retq
422
423 %ext = extractelement <4 x float> %a, i32 0
424 %op = fmul float %b, %ext
425 %ins = insertelement <4 x float> undef, float %op, i32 0
426 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
427 ret <4 x float> %shuf
428}
429
430define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
431; SSE-LABEL: blend_div_ss:
432; SSE: # BB#0:
433; SSE-NEXT: divss %xmm1, %xmm0
434; SSE-NEXT: retq
435;
436; AVX-LABEL: blend_div_ss:
437; AVX: # BB#0:
438; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
439; AVX-NEXT: retq
440
441 %ext = extractelement <4 x float> %a, i32 0
442 %op = fdiv float %ext, %b
443 %ins = insertelement <4 x float> undef, float %op, i32 0
444 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
445 ret <4 x float> %shuf
446}
447
448define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
449; SSE-LABEL: blend_add_sd:
450; SSE: # BB#0:
451; SSE-NEXT: addsd %xmm1, %xmm0
452; SSE-NEXT: retq
453;
454; AVX-LABEL: blend_add_sd:
455; AVX: # BB#0:
456; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
457; AVX-NEXT: retq
458
459 %ext = extractelement <2 x double> %a, i32 0
460 %op = fadd double %b, %ext
461 %ins = insertelement <2 x double> undef, double %op, i32 0
462 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
463 ret <2 x double> %shuf
464}
465
466define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
467; SSE-LABEL: blend_sub_sd:
468; SSE: # BB#0:
469; SSE-NEXT: subsd %xmm1, %xmm0
470; SSE-NEXT: retq
471;
472; AVX-LABEL: blend_sub_sd:
473; AVX: # BB#0:
474; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
475; AVX-NEXT: retq
476
477 %ext = extractelement <2 x double> %a, i32 0
478 %op = fsub double %ext, %b
479 %ins = insertelement <2 x double> undef, double %op, i32 0
480 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
481 ret <2 x double> %shuf
482}
483
484define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
485; SSE-LABEL: blend_mul_sd:
486; SSE: # BB#0:
487; SSE-NEXT: mulsd %xmm1, %xmm0
488; SSE-NEXT: retq
489;
490; AVX-LABEL: blend_mul_sd:
491; AVX: # BB#0:
492; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
493; AVX-NEXT: retq
494
495 %ext = extractelement <2 x double> %a, i32 0
496 %op = fmul double %b, %ext
497 %ins = insertelement <2 x double> undef, double %op, i32 0
498 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
499 ret <2 x double> %shuf
500}
501
502define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
503; SSE-LABEL: blend_div_sd:
504; SSE: # BB#0:
505; SSE-NEXT: divsd %xmm1, %xmm0
506; SSE-NEXT: retq
507;
508; AVX-LABEL: blend_div_sd:
509; AVX: # BB#0:
510; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
511; AVX-NEXT: retq
512
513 %ext = extractelement <2 x double> %a, i32 0
514 %op = fdiv double %ext, %b
515 %ins = insertelement <2 x double> undef, double %op, i32 0
516 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
517 ret <2 x double> %shuf
518}
519
Chandler Carruth23578e92014-10-03 01:37:53 +0000520; Ensure that the backend selects SSE/AVX scalar fp instructions
Sanjay Patelbaa6bc32015-02-13 21:52:42 +0000521; from a packed fp instruction plus a vector insert.
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000522
Chandler Carruth23578e92014-10-03 01:37:53 +0000523define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
524; SSE-LABEL: insert_test_add_ss:
525; SSE: # BB#0:
526; SSE-NEXT: addss %xmm1, %xmm0
527; SSE-NEXT: retq
528;
529; AVX-LABEL: insert_test_add_ss:
530; AVX: # BB#0:
531; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
532; AVX-NEXT: retq
533 %1 = fadd <4 x float> %a, %b
534 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
535 ret <4 x float> %2
536}
537
538define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
539; SSE-LABEL: insert_test_sub_ss:
540; SSE: # BB#0:
541; SSE-NEXT: subss %xmm1, %xmm0
542; SSE-NEXT: retq
543;
544; AVX-LABEL: insert_test_sub_ss:
545; AVX: # BB#0:
546; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
547; AVX-NEXT: retq
548 %1 = fsub <4 x float> %a, %b
549 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
550 ret <4 x float> %2
551}
552
553define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
554; SSE-LABEL: insert_test_mul_ss:
555; SSE: # BB#0:
556; SSE-NEXT: mulss %xmm1, %xmm0
557; SSE-NEXT: retq
558;
559; AVX-LABEL: insert_test_mul_ss:
560; AVX: # BB#0:
561; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
562; AVX-NEXT: retq
563 %1 = fmul <4 x float> %a, %b
564 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
565 ret <4 x float> %2
566}
567
568define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
569; SSE-LABEL: insert_test_div_ss:
570; SSE: # BB#0:
571; SSE-NEXT: divss %xmm1, %xmm0
572; SSE-NEXT: retq
573;
574; AVX-LABEL: insert_test_div_ss:
575; AVX: # BB#0:
576; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
577; AVX-NEXT: retq
578 %1 = fdiv <4 x float> %a, %b
579 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
580 ret <4 x float> %2
581}
582
583define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
584; SSE-LABEL: insert_test_add_sd:
585; SSE: # BB#0:
586; SSE-NEXT: addsd %xmm1, %xmm0
587; SSE-NEXT: retq
588;
589; AVX-LABEL: insert_test_add_sd:
590; AVX: # BB#0:
591; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
592; AVX-NEXT: retq
593 %1 = fadd <2 x double> %a, %b
594 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
595 ret <2 x double> %2
596}
597
598define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
599; SSE-LABEL: insert_test_sub_sd:
600; SSE: # BB#0:
601; SSE-NEXT: subsd %xmm1, %xmm0
602; SSE-NEXT: retq
603;
604; AVX-LABEL: insert_test_sub_sd:
605; AVX: # BB#0:
606; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
607; AVX-NEXT: retq
608 %1 = fsub <2 x double> %a, %b
609 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
610 ret <2 x double> %2
611}
612
613define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
614; SSE-LABEL: insert_test_mul_sd:
615; SSE: # BB#0:
616; SSE-NEXT: mulsd %xmm1, %xmm0
617; SSE-NEXT: retq
618;
619; AVX-LABEL: insert_test_mul_sd:
620; AVX: # BB#0:
621; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
622; AVX-NEXT: retq
623 %1 = fmul <2 x double> %a, %b
624 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
625 ret <2 x double> %2
626}
627
628define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
629; SSE-LABEL: insert_test_div_sd:
630; SSE: # BB#0:
631; SSE-NEXT: divsd %xmm1, %xmm0
632; SSE-NEXT: retq
633;
634; AVX-LABEL: insert_test_div_sd:
635; AVX: # BB#0:
636; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
637; AVX-NEXT: retq
638 %1 = fdiv <2 x double> %a, %b
639 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
640 ret <2 x double> %2
641}
642
643define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
644; SSE-LABEL: insert_test2_add_ss:
645; SSE: # BB#0:
646; SSE-NEXT: addss %xmm0, %xmm1
647; SSE-NEXT: movaps %xmm1, %xmm0
648; SSE-NEXT: retq
649;
650; AVX-LABEL: insert_test2_add_ss:
651; AVX: # BB#0:
652; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
653; AVX-NEXT: retq
654 %1 = fadd <4 x float> %b, %a
655 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
656 ret <4 x float> %2
657}
658
659define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
660; SSE-LABEL: insert_test2_sub_ss:
661; SSE: # BB#0:
662; SSE-NEXT: subss %xmm0, %xmm1
663; SSE-NEXT: movaps %xmm1, %xmm0
664; SSE-NEXT: retq
665;
666; AVX-LABEL: insert_test2_sub_ss:
667; AVX: # BB#0:
668; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
669; AVX-NEXT: retq
670 %1 = fsub <4 x float> %b, %a
671 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
672 ret <4 x float> %2
673}
674
675define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
676; SSE-LABEL: insert_test2_mul_ss:
677; SSE: # BB#0:
678; SSE-NEXT: mulss %xmm0, %xmm1
679; SSE-NEXT: movaps %xmm1, %xmm0
680; SSE-NEXT: retq
681;
682; AVX-LABEL: insert_test2_mul_ss:
683; AVX: # BB#0:
684; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
685; AVX-NEXT: retq
686 %1 = fmul <4 x float> %b, %a
687 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
688 ret <4 x float> %2
689}
690
691define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
692; SSE-LABEL: insert_test2_div_ss:
693; SSE: # BB#0:
694; SSE-NEXT: divss %xmm0, %xmm1
695; SSE-NEXT: movaps %xmm1, %xmm0
696; SSE-NEXT: retq
697;
698; AVX-LABEL: insert_test2_div_ss:
699; AVX: # BB#0:
700; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
701; AVX-NEXT: retq
702 %1 = fdiv <4 x float> %b, %a
703 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
704 ret <4 x float> %2
705}
706
707define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
708; SSE-LABEL: insert_test2_add_sd:
709; SSE: # BB#0:
710; SSE-NEXT: addsd %xmm0, %xmm1
711; SSE-NEXT: movaps %xmm1, %xmm0
712; SSE-NEXT: retq
713;
714; AVX-LABEL: insert_test2_add_sd:
715; AVX: # BB#0:
716; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
717; AVX-NEXT: retq
718 %1 = fadd <2 x double> %b, %a
719 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
720 ret <2 x double> %2
721}
722
723define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
724; SSE-LABEL: insert_test2_sub_sd:
725; SSE: # BB#0:
726; SSE-NEXT: subsd %xmm0, %xmm1
727; SSE-NEXT: movaps %xmm1, %xmm0
728; SSE-NEXT: retq
729;
730; AVX-LABEL: insert_test2_sub_sd:
731; AVX: # BB#0:
732; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
733; AVX-NEXT: retq
734 %1 = fsub <2 x double> %b, %a
735 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
736 ret <2 x double> %2
737}
738
739define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
740; SSE-LABEL: insert_test2_mul_sd:
741; SSE: # BB#0:
742; SSE-NEXT: mulsd %xmm0, %xmm1
743; SSE-NEXT: movaps %xmm1, %xmm0
744; SSE-NEXT: retq
745;
746; AVX-LABEL: insert_test2_mul_sd:
747; AVX: # BB#0:
748; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
749; AVX-NEXT: retq
750 %1 = fmul <2 x double> %b, %a
751 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
752 ret <2 x double> %2
753}
754
755define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
756; SSE-LABEL: insert_test2_div_sd:
757; SSE: # BB#0:
758; SSE-NEXT: divsd %xmm0, %xmm1
759; SSE-NEXT: movaps %xmm1, %xmm0
760; SSE-NEXT: retq
761;
762; AVX-LABEL: insert_test2_div_sd:
763; AVX: # BB#0:
764; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
765; AVX-NEXT: retq
766 %1 = fdiv <2 x double> %b, %a
767 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
768 ret <2 x double> %2
769}
770
771define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
772; SSE-LABEL: insert_test3_add_ss:
773; SSE: # BB#0:
774; SSE-NEXT: addss %xmm1, %xmm0
775; SSE-NEXT: retq
776;
777; AVX-LABEL: insert_test3_add_ss:
778; AVX: # BB#0:
779; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
780; AVX-NEXT: retq
781 %1 = fadd <4 x float> %a, %b
782 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
783 ret <4 x float> %2
784}
785
786define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
787; SSE-LABEL: insert_test3_sub_ss:
788; SSE: # BB#0:
789; SSE-NEXT: subss %xmm1, %xmm0
790; SSE-NEXT: retq
791;
792; AVX-LABEL: insert_test3_sub_ss:
793; AVX: # BB#0:
794; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
795; AVX-NEXT: retq
796 %1 = fsub <4 x float> %a, %b
797 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
798 ret <4 x float> %2
799}
800
801define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
802; SSE-LABEL: insert_test3_mul_ss:
803; SSE: # BB#0:
804; SSE-NEXT: mulss %xmm1, %xmm0
805; SSE-NEXT: retq
806;
807; AVX-LABEL: insert_test3_mul_ss:
808; AVX: # BB#0:
809; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
810; AVX-NEXT: retq
811 %1 = fmul <4 x float> %a, %b
812 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
813 ret <4 x float> %2
814}
815
816define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
817; SSE-LABEL: insert_test3_div_ss:
818; SSE: # BB#0:
819; SSE-NEXT: divss %xmm1, %xmm0
820; SSE-NEXT: retq
821;
822; AVX-LABEL: insert_test3_div_ss:
823; AVX: # BB#0:
824; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
825; AVX-NEXT: retq
826 %1 = fdiv <4 x float> %a, %b
827 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
828 ret <4 x float> %2
829}
830
831define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
832; SSE-LABEL: insert_test3_add_sd:
833; SSE: # BB#0:
834; SSE-NEXT: addsd %xmm1, %xmm0
835; SSE-NEXT: retq
836;
837; AVX-LABEL: insert_test3_add_sd:
838; AVX: # BB#0:
839; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
840; AVX-NEXT: retq
841 %1 = fadd <2 x double> %a, %b
842 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
843 ret <2 x double> %2
844}
845
846define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
847; SSE-LABEL: insert_test3_sub_sd:
848; SSE: # BB#0:
849; SSE-NEXT: subsd %xmm1, %xmm0
850; SSE-NEXT: retq
851;
852; AVX-LABEL: insert_test3_sub_sd:
853; AVX: # BB#0:
854; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
855; AVX-NEXT: retq
856 %1 = fsub <2 x double> %a, %b
857 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
858 ret <2 x double> %2
859}
860
861define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
862; SSE-LABEL: insert_test3_mul_sd:
863; SSE: # BB#0:
864; SSE-NEXT: mulsd %xmm1, %xmm0
865; SSE-NEXT: retq
866;
867; AVX-LABEL: insert_test3_mul_sd:
868; AVX: # BB#0:
869; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
870; AVX-NEXT: retq
871 %1 = fmul <2 x double> %a, %b
872 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
873 ret <2 x double> %2
874}
875
876define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
877; SSE-LABEL: insert_test3_div_sd:
878; SSE: # BB#0:
879; SSE-NEXT: divsd %xmm1, %xmm0
880; SSE-NEXT: retq
881;
882; AVX-LABEL: insert_test3_div_sd:
883; AVX: # BB#0:
884; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
885; AVX-NEXT: retq
886 %1 = fdiv <2 x double> %a, %b
887 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
888 ret <2 x double> %2
889}
890
891define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
892; SSE-LABEL: insert_test4_add_ss:
893; SSE: # BB#0:
894; SSE-NEXT: addss %xmm0, %xmm1
895; SSE-NEXT: movaps %xmm1, %xmm0
896; SSE-NEXT: retq
897;
898; AVX-LABEL: insert_test4_add_ss:
899; AVX: # BB#0:
900; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
901; AVX-NEXT: retq
902 %1 = fadd <4 x float> %b, %a
903 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
904 ret <4 x float> %2
905}
906
907define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
908; SSE-LABEL: insert_test4_sub_ss:
909; SSE: # BB#0:
910; SSE-NEXT: subss %xmm0, %xmm1
911; SSE-NEXT: movaps %xmm1, %xmm0
912; SSE-NEXT: retq
913;
914; AVX-LABEL: insert_test4_sub_ss:
915; AVX: # BB#0:
916; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
917; AVX-NEXT: retq
918 %1 = fsub <4 x float> %b, %a
919 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
920 ret <4 x float> %2
921}
922
923define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
924; SSE-LABEL: insert_test4_mul_ss:
925; SSE: # BB#0:
926; SSE-NEXT: mulss %xmm0, %xmm1
927; SSE-NEXT: movaps %xmm1, %xmm0
928; SSE-NEXT: retq
929;
930; AVX-LABEL: insert_test4_mul_ss:
931; AVX: # BB#0:
932; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
933; AVX-NEXT: retq
934 %1 = fmul <4 x float> %b, %a
935 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
936 ret <4 x float> %2
937}
938
939define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
940; SSE-LABEL: insert_test4_div_ss:
941; SSE: # BB#0:
942; SSE-NEXT: divss %xmm0, %xmm1
943; SSE-NEXT: movaps %xmm1, %xmm0
944; SSE-NEXT: retq
945;
946; AVX-LABEL: insert_test4_div_ss:
947; AVX: # BB#0:
948; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
949; AVX-NEXT: retq
950 %1 = fdiv <4 x float> %b, %a
951 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
952 ret <4 x float> %2
953}
954
955define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
956; SSE-LABEL: insert_test4_add_sd:
957; SSE: # BB#0:
958; SSE-NEXT: addsd %xmm0, %xmm1
959; SSE-NEXT: movaps %xmm1, %xmm0
960; SSE-NEXT: retq
961;
962; AVX-LABEL: insert_test4_add_sd:
963; AVX: # BB#0:
964; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
965; AVX-NEXT: retq
966 %1 = fadd <2 x double> %b, %a
967 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
968 ret <2 x double> %2
969}
970
971define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
972; SSE-LABEL: insert_test4_sub_sd:
973; SSE: # BB#0:
974; SSE-NEXT: subsd %xmm0, %xmm1
975; SSE-NEXT: movaps %xmm1, %xmm0
976; SSE-NEXT: retq
977;
978; AVX-LABEL: insert_test4_sub_sd:
979; AVX: # BB#0:
980; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
981; AVX-NEXT: retq
982 %1 = fsub <2 x double> %b, %a
983 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
984 ret <2 x double> %2
985}
986
987define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
988; SSE-LABEL: insert_test4_mul_sd:
989; SSE: # BB#0:
990; SSE-NEXT: mulsd %xmm0, %xmm1
991; SSE-NEXT: movaps %xmm1, %xmm0
992; SSE-NEXT: retq
993;
994; AVX-LABEL: insert_test4_mul_sd:
995; AVX: # BB#0:
996; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
997; AVX-NEXT: retq
998 %1 = fmul <2 x double> %b, %a
999 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1000 ret <2 x double> %2
1001}
1002
1003define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
1004; SSE-LABEL: insert_test4_div_sd:
1005; SSE: # BB#0:
1006; SSE-NEXT: divsd %xmm0, %xmm1
1007; SSE-NEXT: movaps %xmm1, %xmm0
1008; SSE-NEXT: retq
1009;
1010; AVX-LABEL: insert_test4_div_sd:
1011; AVX: # BB#0:
1012; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
1013; AVX-NEXT: retq
1014 %1 = fdiv <2 x double> %b, %a
1015 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1016 ret <2 x double> %2
1017}