blob: b122ef67544c6dbdb61c7787408758836100c424 [file] [log] [blame]
Chandler Carruth7a4459f2014-10-03 11:30:02 +00001; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
4
5target triple = "x86_64-unknown-unknown"
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +00006
7; Ensure that the backend no longer emits unnecessary vector insert
8; instructions immediately after SSE scalar fp instructions
9; like addss or mulss.
10
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000011define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000012; SSE-LABEL: test_add_ss:
13; SSE: # BB#0:
14; SSE-NEXT: addss %xmm1, %xmm0
15; SSE-NEXT: retq
16;
17; AVX-LABEL: test_add_ss:
18; AVX: # BB#0:
19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000021 %1 = extractelement <4 x float> %b, i32 0
22 %2 = extractelement <4 x float> %a, i32 0
23 %add = fadd float %2, %1
24 %3 = insertelement <4 x float> %a, float %add, i32 0
25 ret <4 x float> %3
26}
27
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000028define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000029; SSE-LABEL: test_sub_ss:
30; SSE: # BB#0:
31; SSE-NEXT: subss %xmm1, %xmm0
32; SSE-NEXT: retq
33;
34; AVX-LABEL: test_sub_ss:
35; AVX: # BB#0:
36; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
37; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000038 %1 = extractelement <4 x float> %b, i32 0
39 %2 = extractelement <4 x float> %a, i32 0
40 %sub = fsub float %2, %1
41 %3 = insertelement <4 x float> %a, float %sub, i32 0
42 ret <4 x float> %3
43}
44
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000045define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000046; SSE-LABEL: test_mul_ss:
47; SSE: # BB#0:
48; SSE-NEXT: mulss %xmm1, %xmm0
49; SSE-NEXT: retq
50;
51; AVX-LABEL: test_mul_ss:
52; AVX: # BB#0:
53; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
54; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000055 %1 = extractelement <4 x float> %b, i32 0
56 %2 = extractelement <4 x float> %a, i32 0
57 %mul = fmul float %2, %1
58 %3 = insertelement <4 x float> %a, float %mul, i32 0
59 ret <4 x float> %3
60}
61
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000062define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000063; SSE-LABEL: test_div_ss:
64; SSE: # BB#0:
65; SSE-NEXT: divss %xmm1, %xmm0
66; SSE-NEXT: retq
67;
68; AVX-LABEL: test_div_ss:
69; AVX: # BB#0:
70; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
71; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000072 %1 = extractelement <4 x float> %b, i32 0
73 %2 = extractelement <4 x float> %a, i32 0
74 %div = fdiv float %2, %1
75 %3 = insertelement <4 x float> %a, float %div, i32 0
76 ret <4 x float> %3
77}
78
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000079define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000080; SSE-LABEL: test_add_sd:
81; SSE: # BB#0:
82; SSE-NEXT: addsd %xmm1, %xmm0
83; SSE-NEXT: retq
84;
85; AVX-LABEL: test_add_sd:
86; AVX: # BB#0:
87; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
88; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000089 %1 = extractelement <2 x double> %b, i32 0
90 %2 = extractelement <2 x double> %a, i32 0
91 %add = fadd double %2, %1
92 %3 = insertelement <2 x double> %a, double %add, i32 0
93 ret <2 x double> %3
94}
95
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000096define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000097; SSE-LABEL: test_sub_sd:
98; SSE: # BB#0:
99; SSE-NEXT: subsd %xmm1, %xmm0
100; SSE-NEXT: retq
101;
102; AVX-LABEL: test_sub_sd:
103; AVX: # BB#0:
104; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
105; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000106 %1 = extractelement <2 x double> %b, i32 0
107 %2 = extractelement <2 x double> %a, i32 0
108 %sub = fsub double %2, %1
109 %3 = insertelement <2 x double> %a, double %sub, i32 0
110 ret <2 x double> %3
111}
112
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000113define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000114; SSE-LABEL: test_mul_sd:
115; SSE: # BB#0:
116; SSE-NEXT: mulsd %xmm1, %xmm0
117; SSE-NEXT: retq
118;
119; AVX-LABEL: test_mul_sd:
120; AVX: # BB#0:
121; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
122; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000123 %1 = extractelement <2 x double> %b, i32 0
124 %2 = extractelement <2 x double> %a, i32 0
125 %mul = fmul double %2, %1
126 %3 = insertelement <2 x double> %a, double %mul, i32 0
127 ret <2 x double> %3
128}
129
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000130define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000131; SSE-LABEL: test_div_sd:
132; SSE: # BB#0:
133; SSE-NEXT: divsd %xmm1, %xmm0
134; SSE-NEXT: retq
135;
136; AVX-LABEL: test_div_sd:
137; AVX: # BB#0:
138; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
139; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000140 %1 = extractelement <2 x double> %b, i32 0
141 %2 = extractelement <2 x double> %a, i32 0
142 %div = fdiv double %2, %1
143 %3 = insertelement <2 x double> %a, double %div, i32 0
144 ret <2 x double> %3
145}
146
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000147define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000148; SSE-LABEL: test2_add_ss:
149; SSE: # BB#0:
150; SSE-NEXT: addss %xmm0, %xmm1
151; SSE-NEXT: movaps %xmm1, %xmm0
152; SSE-NEXT: retq
153;
154; AVX-LABEL: test2_add_ss:
155; AVX: # BB#0:
156; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
157; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000158 %1 = extractelement <4 x float> %a, i32 0
159 %2 = extractelement <4 x float> %b, i32 0
160 %add = fadd float %1, %2
161 %3 = insertelement <4 x float> %b, float %add, i32 0
162 ret <4 x float> %3
163}
164
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000165define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000166; SSE-LABEL: test2_sub_ss:
167; SSE: # BB#0:
168; SSE-NEXT: subss %xmm0, %xmm1
169; SSE-NEXT: movaps %xmm1, %xmm0
170; SSE-NEXT: retq
171;
172; AVX-LABEL: test2_sub_ss:
173; AVX: # BB#0:
174; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
175; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000176 %1 = extractelement <4 x float> %a, i32 0
177 %2 = extractelement <4 x float> %b, i32 0
178 %sub = fsub float %2, %1
179 %3 = insertelement <4 x float> %b, float %sub, i32 0
180 ret <4 x float> %3
181}
182
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000183define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000184; SSE-LABEL: test2_mul_ss:
185; SSE: # BB#0:
186; SSE-NEXT: mulss %xmm0, %xmm1
187; SSE-NEXT: movaps %xmm1, %xmm0
188; SSE-NEXT: retq
189;
190; AVX-LABEL: test2_mul_ss:
191; AVX: # BB#0:
192; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
193; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000194 %1 = extractelement <4 x float> %a, i32 0
195 %2 = extractelement <4 x float> %b, i32 0
196 %mul = fmul float %1, %2
197 %3 = insertelement <4 x float> %b, float %mul, i32 0
198 ret <4 x float> %3
199}
200
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000201define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000202; SSE-LABEL: test2_div_ss:
203; SSE: # BB#0:
204; SSE-NEXT: divss %xmm0, %xmm1
205; SSE-NEXT: movaps %xmm1, %xmm0
206; SSE-NEXT: retq
207;
208; AVX-LABEL: test2_div_ss:
209; AVX: # BB#0:
210; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
211; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000212 %1 = extractelement <4 x float> %a, i32 0
213 %2 = extractelement <4 x float> %b, i32 0
214 %div = fdiv float %2, %1
215 %3 = insertelement <4 x float> %b, float %div, i32 0
216 ret <4 x float> %3
217}
218
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000219define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000220; SSE-LABEL: test2_add_sd:
221; SSE: # BB#0:
222; SSE-NEXT: addsd %xmm0, %xmm1
223; SSE-NEXT: movaps %xmm1, %xmm0
224; SSE-NEXT: retq
225;
226; AVX-LABEL: test2_add_sd:
227; AVX: # BB#0:
228; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
229; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000230 %1 = extractelement <2 x double> %a, i32 0
231 %2 = extractelement <2 x double> %b, i32 0
232 %add = fadd double %1, %2
233 %3 = insertelement <2 x double> %b, double %add, i32 0
234 ret <2 x double> %3
235}
236
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000237define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000238; SSE-LABEL: test2_sub_sd:
239; SSE: # BB#0:
240; SSE-NEXT: subsd %xmm0, %xmm1
241; SSE-NEXT: movaps %xmm1, %xmm0
242; SSE-NEXT: retq
243;
244; AVX-LABEL: test2_sub_sd:
245; AVX: # BB#0:
246; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
247; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000248 %1 = extractelement <2 x double> %a, i32 0
249 %2 = extractelement <2 x double> %b, i32 0
250 %sub = fsub double %2, %1
251 %3 = insertelement <2 x double> %b, double %sub, i32 0
252 ret <2 x double> %3
253}
254
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000255define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000256; SSE-LABEL: test2_mul_sd:
257; SSE: # BB#0:
258; SSE-NEXT: mulsd %xmm0, %xmm1
259; SSE-NEXT: movaps %xmm1, %xmm0
260; SSE-NEXT: retq
261;
262; AVX-LABEL: test2_mul_sd:
263; AVX: # BB#0:
264; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
265; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000266 %1 = extractelement <2 x double> %a, i32 0
267 %2 = extractelement <2 x double> %b, i32 0
268 %mul = fmul double %1, %2
269 %3 = insertelement <2 x double> %b, double %mul, i32 0
270 ret <2 x double> %3
271}
272
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000273define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000274; SSE-LABEL: test2_div_sd:
275; SSE: # BB#0:
276; SSE-NEXT: divsd %xmm0, %xmm1
277; SSE-NEXT: movaps %xmm1, %xmm0
278; SSE-NEXT: retq
279;
280; AVX-LABEL: test2_div_sd:
281; AVX: # BB#0:
282; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
283; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000284 %1 = extractelement <2 x double> %a, i32 0
285 %2 = extractelement <2 x double> %b, i32 0
286 %div = fdiv double %2, %1
287 %3 = insertelement <2 x double> %b, double %div, i32 0
288 ret <2 x double> %3
289}
290
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000291define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000292; SSE-LABEL: test_multiple_add_ss:
293; SSE: # BB#0:
294; SSE-NEXT: addss %xmm0, %xmm1
295; SSE-NEXT: addss %xmm1, %xmm0
296; SSE-NEXT: retq
297;
298; AVX-LABEL: test_multiple_add_ss:
299; AVX: # BB#0:
300; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
301; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
302; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000303 %1 = extractelement <4 x float> %b, i32 0
304 %2 = extractelement <4 x float> %a, i32 0
305 %add = fadd float %2, %1
306 %add2 = fadd float %2, %add
307 %3 = insertelement <4 x float> %a, float %add2, i32 0
308 ret <4 x float> %3
309}
310
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000311define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000312; SSE-LABEL: test_multiple_sub_ss:
313; SSE: # BB#0:
314; SSE-NEXT: movaps %xmm0, %xmm2
315; SSE-NEXT: subss %xmm1, %xmm2
316; SSE-NEXT: subss %xmm2, %xmm0
317; SSE-NEXT: retq
318;
319; AVX-LABEL: test_multiple_sub_ss:
320; AVX: # BB#0:
321; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
322; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
323; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000324 %1 = extractelement <4 x float> %b, i32 0
325 %2 = extractelement <4 x float> %a, i32 0
326 %sub = fsub float %2, %1
327 %sub2 = fsub float %2, %sub
328 %3 = insertelement <4 x float> %a, float %sub2, i32 0
329 ret <4 x float> %3
330}
331
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000332define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000333; SSE-LABEL: test_multiple_mul_ss:
334; SSE: # BB#0:
335; SSE-NEXT: mulss %xmm0, %xmm1
336; SSE-NEXT: mulss %xmm1, %xmm0
337; SSE-NEXT: retq
338;
339; AVX-LABEL: test_multiple_mul_ss:
340; AVX: # BB#0:
341; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
342; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
343; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000344 %1 = extractelement <4 x float> %b, i32 0
345 %2 = extractelement <4 x float> %a, i32 0
346 %mul = fmul float %2, %1
347 %mul2 = fmul float %2, %mul
348 %3 = insertelement <4 x float> %a, float %mul2, i32 0
349 ret <4 x float> %3
350}
351
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000352define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000353; SSE-LABEL: test_multiple_div_ss:
354; SSE: # BB#0:
355; SSE-NEXT: movaps %xmm0, %xmm2
356; SSE-NEXT: divss %xmm1, %xmm2
357; SSE-NEXT: divss %xmm2, %xmm0
358; SSE-NEXT: retq
359;
360; AVX-LABEL: test_multiple_div_ss:
361; AVX: # BB#0:
362; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
363; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
364; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000365 %1 = extractelement <4 x float> %b, i32 0
366 %2 = extractelement <4 x float> %a, i32 0
367 %div = fdiv float %2, %1
368 %div2 = fdiv float %2, %div
369 %3 = insertelement <4 x float> %a, float %div2, i32 0
370 ret <4 x float> %3
371}
372
Chandler Carruth23578e92014-10-03 01:37:53 +0000373; Ensure that the backend selects SSE/AVX scalar fp instructions
374; from a packed fp instrution plus a vector insert.
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000375
Chandler Carruth23578e92014-10-03 01:37:53 +0000376define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
377; SSE-LABEL: insert_test_add_ss:
378; SSE: # BB#0:
379; SSE-NEXT: addss %xmm1, %xmm0
380; SSE-NEXT: retq
381;
382; AVX-LABEL: insert_test_add_ss:
383; AVX: # BB#0:
384; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
385; AVX-NEXT: retq
386 %1 = fadd <4 x float> %a, %b
387 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
388 ret <4 x float> %2
389}
390
391define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
392; SSE-LABEL: insert_test_sub_ss:
393; SSE: # BB#0:
394; SSE-NEXT: subss %xmm1, %xmm0
395; SSE-NEXT: retq
396;
397; AVX-LABEL: insert_test_sub_ss:
398; AVX: # BB#0:
399; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
400; AVX-NEXT: retq
401 %1 = fsub <4 x float> %a, %b
402 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
403 ret <4 x float> %2
404}
405
406define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
407; SSE-LABEL: insert_test_mul_ss:
408; SSE: # BB#0:
409; SSE-NEXT: mulss %xmm1, %xmm0
410; SSE-NEXT: retq
411;
412; AVX-LABEL: insert_test_mul_ss:
413; AVX: # BB#0:
414; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
415; AVX-NEXT: retq
416 %1 = fmul <4 x float> %a, %b
417 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
418 ret <4 x float> %2
419}
420
421define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
422; SSE-LABEL: insert_test_div_ss:
423; SSE: # BB#0:
424; SSE-NEXT: divss %xmm1, %xmm0
425; SSE-NEXT: retq
426;
427; AVX-LABEL: insert_test_div_ss:
428; AVX: # BB#0:
429; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
430; AVX-NEXT: retq
431 %1 = fdiv <4 x float> %a, %b
432 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
433 ret <4 x float> %2
434}
435
436define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
437; SSE-LABEL: insert_test_add_sd:
438; SSE: # BB#0:
439; SSE-NEXT: addsd %xmm1, %xmm0
440; SSE-NEXT: retq
441;
442; AVX-LABEL: insert_test_add_sd:
443; AVX: # BB#0:
444; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
445; AVX-NEXT: retq
446 %1 = fadd <2 x double> %a, %b
447 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
448 ret <2 x double> %2
449}
450
451define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
452; SSE-LABEL: insert_test_sub_sd:
453; SSE: # BB#0:
454; SSE-NEXT: subsd %xmm1, %xmm0
455; SSE-NEXT: retq
456;
457; AVX-LABEL: insert_test_sub_sd:
458; AVX: # BB#0:
459; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
460; AVX-NEXT: retq
461 %1 = fsub <2 x double> %a, %b
462 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
463 ret <2 x double> %2
464}
465
466define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
467; SSE-LABEL: insert_test_mul_sd:
468; SSE: # BB#0:
469; SSE-NEXT: mulsd %xmm1, %xmm0
470; SSE-NEXT: retq
471;
472; AVX-LABEL: insert_test_mul_sd:
473; AVX: # BB#0:
474; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
475; AVX-NEXT: retq
476 %1 = fmul <2 x double> %a, %b
477 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
478 ret <2 x double> %2
479}
480
481define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
482; SSE-LABEL: insert_test_div_sd:
483; SSE: # BB#0:
484; SSE-NEXT: divsd %xmm1, %xmm0
485; SSE-NEXT: retq
486;
487; AVX-LABEL: insert_test_div_sd:
488; AVX: # BB#0:
489; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
490; AVX-NEXT: retq
491 %1 = fdiv <2 x double> %a, %b
492 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
493 ret <2 x double> %2
494}
495
496define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
497; SSE-LABEL: insert_test2_add_ss:
498; SSE: # BB#0:
499; SSE-NEXT: addss %xmm0, %xmm1
500; SSE-NEXT: movaps %xmm1, %xmm0
501; SSE-NEXT: retq
502;
503; AVX-LABEL: insert_test2_add_ss:
504; AVX: # BB#0:
505; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
506; AVX-NEXT: retq
507 %1 = fadd <4 x float> %b, %a
508 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
509 ret <4 x float> %2
510}
511
512define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
513; SSE-LABEL: insert_test2_sub_ss:
514; SSE: # BB#0:
515; SSE-NEXT: subss %xmm0, %xmm1
516; SSE-NEXT: movaps %xmm1, %xmm0
517; SSE-NEXT: retq
518;
519; AVX-LABEL: insert_test2_sub_ss:
520; AVX: # BB#0:
521; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
522; AVX-NEXT: retq
523 %1 = fsub <4 x float> %b, %a
524 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
525 ret <4 x float> %2
526}
527
528define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
529; SSE-LABEL: insert_test2_mul_ss:
530; SSE: # BB#0:
531; SSE-NEXT: mulss %xmm0, %xmm1
532; SSE-NEXT: movaps %xmm1, %xmm0
533; SSE-NEXT: retq
534;
535; AVX-LABEL: insert_test2_mul_ss:
536; AVX: # BB#0:
537; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
538; AVX-NEXT: retq
539 %1 = fmul <4 x float> %b, %a
540 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
541 ret <4 x float> %2
542}
543
544define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
545; SSE-LABEL: insert_test2_div_ss:
546; SSE: # BB#0:
547; SSE-NEXT: divss %xmm0, %xmm1
548; SSE-NEXT: movaps %xmm1, %xmm0
549; SSE-NEXT: retq
550;
551; AVX-LABEL: insert_test2_div_ss:
552; AVX: # BB#0:
553; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
554; AVX-NEXT: retq
555 %1 = fdiv <4 x float> %b, %a
556 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
557 ret <4 x float> %2
558}
559
560define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
561; SSE-LABEL: insert_test2_add_sd:
562; SSE: # BB#0:
563; SSE-NEXT: addsd %xmm0, %xmm1
564; SSE-NEXT: movaps %xmm1, %xmm0
565; SSE-NEXT: retq
566;
567; AVX-LABEL: insert_test2_add_sd:
568; AVX: # BB#0:
569; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
570; AVX-NEXT: retq
571 %1 = fadd <2 x double> %b, %a
572 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
573 ret <2 x double> %2
574}
575
576define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
577; SSE-LABEL: insert_test2_sub_sd:
578; SSE: # BB#0:
579; SSE-NEXT: subsd %xmm0, %xmm1
580; SSE-NEXT: movaps %xmm1, %xmm0
581; SSE-NEXT: retq
582;
583; AVX-LABEL: insert_test2_sub_sd:
584; AVX: # BB#0:
585; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
586; AVX-NEXT: retq
587 %1 = fsub <2 x double> %b, %a
588 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
589 ret <2 x double> %2
590}
591
592define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
593; SSE-LABEL: insert_test2_mul_sd:
594; SSE: # BB#0:
595; SSE-NEXT: mulsd %xmm0, %xmm1
596; SSE-NEXT: movaps %xmm1, %xmm0
597; SSE-NEXT: retq
598;
599; AVX-LABEL: insert_test2_mul_sd:
600; AVX: # BB#0:
601; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
602; AVX-NEXT: retq
603 %1 = fmul <2 x double> %b, %a
604 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
605 ret <2 x double> %2
606}
607
608define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
609; SSE-LABEL: insert_test2_div_sd:
610; SSE: # BB#0:
611; SSE-NEXT: divsd %xmm0, %xmm1
612; SSE-NEXT: movaps %xmm1, %xmm0
613; SSE-NEXT: retq
614;
615; AVX-LABEL: insert_test2_div_sd:
616; AVX: # BB#0:
617; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
618; AVX-NEXT: retq
619 %1 = fdiv <2 x double> %b, %a
620 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
621 ret <2 x double> %2
622}
623
624define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
625; SSE-LABEL: insert_test3_add_ss:
626; SSE: # BB#0:
627; SSE-NEXT: addss %xmm1, %xmm0
628; SSE-NEXT: retq
629;
630; AVX-LABEL: insert_test3_add_ss:
631; AVX: # BB#0:
632; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
633; AVX-NEXT: retq
634 %1 = fadd <4 x float> %a, %b
635 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
636 ret <4 x float> %2
637}
638
639define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
640; SSE-LABEL: insert_test3_sub_ss:
641; SSE: # BB#0:
642; SSE-NEXT: subss %xmm1, %xmm0
643; SSE-NEXT: retq
644;
645; AVX-LABEL: insert_test3_sub_ss:
646; AVX: # BB#0:
647; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
648; AVX-NEXT: retq
649 %1 = fsub <4 x float> %a, %b
650 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
651 ret <4 x float> %2
652}
653
654define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
655; SSE-LABEL: insert_test3_mul_ss:
656; SSE: # BB#0:
657; SSE-NEXT: mulss %xmm1, %xmm0
658; SSE-NEXT: retq
659;
660; AVX-LABEL: insert_test3_mul_ss:
661; AVX: # BB#0:
662; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
663; AVX-NEXT: retq
664 %1 = fmul <4 x float> %a, %b
665 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
666 ret <4 x float> %2
667}
668
669define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
670; SSE-LABEL: insert_test3_div_ss:
671; SSE: # BB#0:
672; SSE-NEXT: divss %xmm1, %xmm0
673; SSE-NEXT: retq
674;
675; AVX-LABEL: insert_test3_div_ss:
676; AVX: # BB#0:
677; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
678; AVX-NEXT: retq
679 %1 = fdiv <4 x float> %a, %b
680 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
681 ret <4 x float> %2
682}
683
684define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
685; SSE-LABEL: insert_test3_add_sd:
686; SSE: # BB#0:
687; SSE-NEXT: addsd %xmm1, %xmm0
688; SSE-NEXT: retq
689;
690; AVX-LABEL: insert_test3_add_sd:
691; AVX: # BB#0:
692; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
693; AVX-NEXT: retq
694 %1 = fadd <2 x double> %a, %b
695 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
696 ret <2 x double> %2
697}
698
699define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
700; SSE-LABEL: insert_test3_sub_sd:
701; SSE: # BB#0:
702; SSE-NEXT: subsd %xmm1, %xmm0
703; SSE-NEXT: retq
704;
705; AVX-LABEL: insert_test3_sub_sd:
706; AVX: # BB#0:
707; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
708; AVX-NEXT: retq
709 %1 = fsub <2 x double> %a, %b
710 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
711 ret <2 x double> %2
712}
713
714define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
715; SSE-LABEL: insert_test3_mul_sd:
716; SSE: # BB#0:
717; SSE-NEXT: mulsd %xmm1, %xmm0
718; SSE-NEXT: retq
719;
720; AVX-LABEL: insert_test3_mul_sd:
721; AVX: # BB#0:
722; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
723; AVX-NEXT: retq
724 %1 = fmul <2 x double> %a, %b
725 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
726 ret <2 x double> %2
727}
728
729define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
730; SSE-LABEL: insert_test3_div_sd:
731; SSE: # BB#0:
732; SSE-NEXT: divsd %xmm1, %xmm0
733; SSE-NEXT: retq
734;
735; AVX-LABEL: insert_test3_div_sd:
736; AVX: # BB#0:
737; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
738; AVX-NEXT: retq
739 %1 = fdiv <2 x double> %a, %b
740 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
741 ret <2 x double> %2
742}
743
744define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
745; SSE-LABEL: insert_test4_add_ss:
746; SSE: # BB#0:
747; SSE-NEXT: addss %xmm0, %xmm1
748; SSE-NEXT: movaps %xmm1, %xmm0
749; SSE-NEXT: retq
750;
751; AVX-LABEL: insert_test4_add_ss:
752; AVX: # BB#0:
753; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
754; AVX-NEXT: retq
755 %1 = fadd <4 x float> %b, %a
756 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
757 ret <4 x float> %2
758}
759
760define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
761; SSE-LABEL: insert_test4_sub_ss:
762; SSE: # BB#0:
763; SSE-NEXT: subss %xmm0, %xmm1
764; SSE-NEXT: movaps %xmm1, %xmm0
765; SSE-NEXT: retq
766;
767; AVX-LABEL: insert_test4_sub_ss:
768; AVX: # BB#0:
769; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
770; AVX-NEXT: retq
771 %1 = fsub <4 x float> %b, %a
772 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
773 ret <4 x float> %2
774}
775
776define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
777; SSE-LABEL: insert_test4_mul_ss:
778; SSE: # BB#0:
779; SSE-NEXT: mulss %xmm0, %xmm1
780; SSE-NEXT: movaps %xmm1, %xmm0
781; SSE-NEXT: retq
782;
783; AVX-LABEL: insert_test4_mul_ss:
784; AVX: # BB#0:
785; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
786; AVX-NEXT: retq
787 %1 = fmul <4 x float> %b, %a
788 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
789 ret <4 x float> %2
790}
791
792define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
793; SSE-LABEL: insert_test4_div_ss:
794; SSE: # BB#0:
795; SSE-NEXT: divss %xmm0, %xmm1
796; SSE-NEXT: movaps %xmm1, %xmm0
797; SSE-NEXT: retq
798;
799; AVX-LABEL: insert_test4_div_ss:
800; AVX: # BB#0:
801; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
802; AVX-NEXT: retq
803 %1 = fdiv <4 x float> %b, %a
804 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
805 ret <4 x float> %2
806}
807
808define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
809; SSE-LABEL: insert_test4_add_sd:
810; SSE: # BB#0:
811; SSE-NEXT: addsd %xmm0, %xmm1
812; SSE-NEXT: movaps %xmm1, %xmm0
813; SSE-NEXT: retq
814;
815; AVX-LABEL: insert_test4_add_sd:
816; AVX: # BB#0:
817; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
818; AVX-NEXT: retq
819 %1 = fadd <2 x double> %b, %a
820 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
821 ret <2 x double> %2
822}
823
824define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
825; SSE-LABEL: insert_test4_sub_sd:
826; SSE: # BB#0:
827; SSE-NEXT: subsd %xmm0, %xmm1
828; SSE-NEXT: movaps %xmm1, %xmm0
829; SSE-NEXT: retq
830;
831; AVX-LABEL: insert_test4_sub_sd:
832; AVX: # BB#0:
833; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
834; AVX-NEXT: retq
835 %1 = fsub <2 x double> %b, %a
836 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
837 ret <2 x double> %2
838}
839
840define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
841; SSE-LABEL: insert_test4_mul_sd:
842; SSE: # BB#0:
843; SSE-NEXT: mulsd %xmm0, %xmm1
844; SSE-NEXT: movaps %xmm1, %xmm0
845; SSE-NEXT: retq
846;
847; AVX-LABEL: insert_test4_mul_sd:
848; AVX: # BB#0:
849; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
850; AVX-NEXT: retq
851 %1 = fmul <2 x double> %b, %a
852 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
853 ret <2 x double> %2
854}
855
856define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
857; SSE-LABEL: insert_test4_div_sd:
858; SSE: # BB#0:
859; SSE-NEXT: divsd %xmm0, %xmm1
860; SSE-NEXT: movaps %xmm1, %xmm0
861; SSE-NEXT: retq
862;
863; AVX-LABEL: insert_test4_div_sd:
864; AVX: # BB#0:
865; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
866; AVX-NEXT: retq
867 %1 = fdiv <2 x double> %b, %a
868 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
869 ret <2 x double> %2
870}