blob: e6a6d99a1992aa3a3b43d75a72a27af90208a769 [file] [log] [blame]
Chandler Carruth23578e92014-10-03 01:37:53 +00001; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck --check-prefix=AVX %s
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +00004
5; Ensure that the backend no longer emits unnecessary vector insert
6; instructions immediately after SSE scalar fp instructions
7; like addss or mulss.
8
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +00009define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000010; SSE-LABEL: test_add_ss:
11; SSE: # BB#0:
12; SSE-NEXT: addss %xmm1, %xmm0
13; SSE-NEXT: retq
14;
15; AVX-LABEL: test_add_ss:
16; AVX: # BB#0:
17; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
18; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000019 %1 = extractelement <4 x float> %b, i32 0
20 %2 = extractelement <4 x float> %a, i32 0
21 %add = fadd float %2, %1
22 %3 = insertelement <4 x float> %a, float %add, i32 0
23 ret <4 x float> %3
24}
25
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000026define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000027; SSE-LABEL: test_sub_ss:
28; SSE: # BB#0:
29; SSE-NEXT: subss %xmm1, %xmm0
30; SSE-NEXT: retq
31;
32; AVX-LABEL: test_sub_ss:
33; AVX: # BB#0:
34; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
35; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000036 %1 = extractelement <4 x float> %b, i32 0
37 %2 = extractelement <4 x float> %a, i32 0
38 %sub = fsub float %2, %1
39 %3 = insertelement <4 x float> %a, float %sub, i32 0
40 ret <4 x float> %3
41}
42
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000043define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000044; SSE-LABEL: test_mul_ss:
45; SSE: # BB#0:
46; SSE-NEXT: mulss %xmm1, %xmm0
47; SSE-NEXT: retq
48;
49; AVX-LABEL: test_mul_ss:
50; AVX: # BB#0:
51; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
52; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000053 %1 = extractelement <4 x float> %b, i32 0
54 %2 = extractelement <4 x float> %a, i32 0
55 %mul = fmul float %2, %1
56 %3 = insertelement <4 x float> %a, float %mul, i32 0
57 ret <4 x float> %3
58}
59
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000060define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000061; SSE-LABEL: test_div_ss:
62; SSE: # BB#0:
63; SSE-NEXT: divss %xmm1, %xmm0
64; SSE-NEXT: retq
65;
66; AVX-LABEL: test_div_ss:
67; AVX: # BB#0:
68; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
69; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000070 %1 = extractelement <4 x float> %b, i32 0
71 %2 = extractelement <4 x float> %a, i32 0
72 %div = fdiv float %2, %1
73 %3 = insertelement <4 x float> %a, float %div, i32 0
74 ret <4 x float> %3
75}
76
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000077define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000078; SSE-LABEL: test_add_sd:
79; SSE: # BB#0:
80; SSE-NEXT: addsd %xmm1, %xmm0
81; SSE-NEXT: retq
82;
83; AVX-LABEL: test_add_sd:
84; AVX: # BB#0:
85; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
86; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000087 %1 = extractelement <2 x double> %b, i32 0
88 %2 = extractelement <2 x double> %a, i32 0
89 %add = fadd double %2, %1
90 %3 = insertelement <2 x double> %a, double %add, i32 0
91 ret <2 x double> %3
92}
93
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +000094define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +000095; SSE-LABEL: test_sub_sd:
96; SSE: # BB#0:
97; SSE-NEXT: subsd %xmm1, %xmm0
98; SSE-NEXT: retq
99;
100; AVX-LABEL: test_sub_sd:
101; AVX: # BB#0:
102; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
103; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000104 %1 = extractelement <2 x double> %b, i32 0
105 %2 = extractelement <2 x double> %a, i32 0
106 %sub = fsub double %2, %1
107 %3 = insertelement <2 x double> %a, double %sub, i32 0
108 ret <2 x double> %3
109}
110
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000111define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000112; SSE-LABEL: test_mul_sd:
113; SSE: # BB#0:
114; SSE-NEXT: mulsd %xmm1, %xmm0
115; SSE-NEXT: retq
116;
117; AVX-LABEL: test_mul_sd:
118; AVX: # BB#0:
119; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
120; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000121 %1 = extractelement <2 x double> %b, i32 0
122 %2 = extractelement <2 x double> %a, i32 0
123 %mul = fmul double %2, %1
124 %3 = insertelement <2 x double> %a, double %mul, i32 0
125 ret <2 x double> %3
126}
127
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000128define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000129; SSE-LABEL: test_div_sd:
130; SSE: # BB#0:
131; SSE-NEXT: divsd %xmm1, %xmm0
132; SSE-NEXT: retq
133;
134; AVX-LABEL: test_div_sd:
135; AVX: # BB#0:
136; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
137; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000138 %1 = extractelement <2 x double> %b, i32 0
139 %2 = extractelement <2 x double> %a, i32 0
140 %div = fdiv double %2, %1
141 %3 = insertelement <2 x double> %a, double %div, i32 0
142 ret <2 x double> %3
143}
144
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000145define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000146; SSE-LABEL: test2_add_ss:
147; SSE: # BB#0:
148; SSE-NEXT: addss %xmm0, %xmm1
149; SSE-NEXT: movaps %xmm1, %xmm0
150; SSE-NEXT: retq
151;
152; AVX-LABEL: test2_add_ss:
153; AVX: # BB#0:
154; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
155; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000156 %1 = extractelement <4 x float> %a, i32 0
157 %2 = extractelement <4 x float> %b, i32 0
158 %add = fadd float %1, %2
159 %3 = insertelement <4 x float> %b, float %add, i32 0
160 ret <4 x float> %3
161}
162
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000163define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000164; SSE-LABEL: test2_sub_ss:
165; SSE: # BB#0:
166; SSE-NEXT: subss %xmm0, %xmm1
167; SSE-NEXT: movaps %xmm1, %xmm0
168; SSE-NEXT: retq
169;
170; AVX-LABEL: test2_sub_ss:
171; AVX: # BB#0:
172; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
173; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000174 %1 = extractelement <4 x float> %a, i32 0
175 %2 = extractelement <4 x float> %b, i32 0
176 %sub = fsub float %2, %1
177 %3 = insertelement <4 x float> %b, float %sub, i32 0
178 ret <4 x float> %3
179}
180
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000181define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000182; SSE-LABEL: test2_mul_ss:
183; SSE: # BB#0:
184; SSE-NEXT: mulss %xmm0, %xmm1
185; SSE-NEXT: movaps %xmm1, %xmm0
186; SSE-NEXT: retq
187;
188; AVX-LABEL: test2_mul_ss:
189; AVX: # BB#0:
190; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
191; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000192 %1 = extractelement <4 x float> %a, i32 0
193 %2 = extractelement <4 x float> %b, i32 0
194 %mul = fmul float %1, %2
195 %3 = insertelement <4 x float> %b, float %mul, i32 0
196 ret <4 x float> %3
197}
198
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000199define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000200; SSE-LABEL: test2_div_ss:
201; SSE: # BB#0:
202; SSE-NEXT: divss %xmm0, %xmm1
203; SSE-NEXT: movaps %xmm1, %xmm0
204; SSE-NEXT: retq
205;
206; AVX-LABEL: test2_div_ss:
207; AVX: # BB#0:
208; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
209; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000210 %1 = extractelement <4 x float> %a, i32 0
211 %2 = extractelement <4 x float> %b, i32 0
212 %div = fdiv float %2, %1
213 %3 = insertelement <4 x float> %b, float %div, i32 0
214 ret <4 x float> %3
215}
216
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000217define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000218; SSE-LABEL: test2_add_sd:
219; SSE: # BB#0:
220; SSE-NEXT: addsd %xmm0, %xmm1
221; SSE-NEXT: movaps %xmm1, %xmm0
222; SSE-NEXT: retq
223;
224; AVX-LABEL: test2_add_sd:
225; AVX: # BB#0:
226; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
227; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000228 %1 = extractelement <2 x double> %a, i32 0
229 %2 = extractelement <2 x double> %b, i32 0
230 %add = fadd double %1, %2
231 %3 = insertelement <2 x double> %b, double %add, i32 0
232 ret <2 x double> %3
233}
234
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000235define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000236; SSE-LABEL: test2_sub_sd:
237; SSE: # BB#0:
238; SSE-NEXT: subsd %xmm0, %xmm1
239; SSE-NEXT: movaps %xmm1, %xmm0
240; SSE-NEXT: retq
241;
242; AVX-LABEL: test2_sub_sd:
243; AVX: # BB#0:
244; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
245; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000246 %1 = extractelement <2 x double> %a, i32 0
247 %2 = extractelement <2 x double> %b, i32 0
248 %sub = fsub double %2, %1
249 %3 = insertelement <2 x double> %b, double %sub, i32 0
250 ret <2 x double> %3
251}
252
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000253define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000254; SSE-LABEL: test2_mul_sd:
255; SSE: # BB#0:
256; SSE-NEXT: mulsd %xmm0, %xmm1
257; SSE-NEXT: movaps %xmm1, %xmm0
258; SSE-NEXT: retq
259;
260; AVX-LABEL: test2_mul_sd:
261; AVX: # BB#0:
262; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
263; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000264 %1 = extractelement <2 x double> %a, i32 0
265 %2 = extractelement <2 x double> %b, i32 0
266 %mul = fmul double %1, %2
267 %3 = insertelement <2 x double> %b, double %mul, i32 0
268 ret <2 x double> %3
269}
270
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000271define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000272; SSE-LABEL: test2_div_sd:
273; SSE: # BB#0:
274; SSE-NEXT: divsd %xmm0, %xmm1
275; SSE-NEXT: movaps %xmm1, %xmm0
276; SSE-NEXT: retq
277;
278; AVX-LABEL: test2_div_sd:
279; AVX: # BB#0:
280; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
281; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000282 %1 = extractelement <2 x double> %a, i32 0
283 %2 = extractelement <2 x double> %b, i32 0
284 %div = fdiv double %2, %1
285 %3 = insertelement <2 x double> %b, double %div, i32 0
286 ret <2 x double> %3
287}
288
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000289define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000290; SSE-LABEL: test_multiple_add_ss:
291; SSE: # BB#0:
292; SSE-NEXT: addss %xmm0, %xmm1
293; SSE-NEXT: addss %xmm1, %xmm0
294; SSE-NEXT: retq
295;
296; AVX-LABEL: test_multiple_add_ss:
297; AVX: # BB#0:
298; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
299; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
300; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000301 %1 = extractelement <4 x float> %b, i32 0
302 %2 = extractelement <4 x float> %a, i32 0
303 %add = fadd float %2, %1
304 %add2 = fadd float %2, %add
305 %3 = insertelement <4 x float> %a, float %add2, i32 0
306 ret <4 x float> %3
307}
308
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000309define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000310; SSE-LABEL: test_multiple_sub_ss:
311; SSE: # BB#0:
312; SSE-NEXT: movaps %xmm0, %xmm2
313; SSE-NEXT: subss %xmm1, %xmm2
314; SSE-NEXT: subss %xmm2, %xmm0
315; SSE-NEXT: retq
316;
317; AVX-LABEL: test_multiple_sub_ss:
318; AVX: # BB#0:
319; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
320; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
321; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000322 %1 = extractelement <4 x float> %b, i32 0
323 %2 = extractelement <4 x float> %a, i32 0
324 %sub = fsub float %2, %1
325 %sub2 = fsub float %2, %sub
326 %3 = insertelement <4 x float> %a, float %sub2, i32 0
327 ret <4 x float> %3
328}
329
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000330define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000331; SSE-LABEL: test_multiple_mul_ss:
332; SSE: # BB#0:
333; SSE-NEXT: mulss %xmm0, %xmm1
334; SSE-NEXT: mulss %xmm1, %xmm0
335; SSE-NEXT: retq
336;
337; AVX-LABEL: test_multiple_mul_ss:
338; AVX: # BB#0:
339; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
340; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
341; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000342 %1 = extractelement <4 x float> %b, i32 0
343 %2 = extractelement <4 x float> %a, i32 0
344 %mul = fmul float %2, %1
345 %mul2 = fmul float %2, %mul
346 %3 = insertelement <4 x float> %a, float %mul2, i32 0
347 ret <4 x float> %3
348}
349
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000350define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
Chandler Carruth23578e92014-10-03 01:37:53 +0000351; SSE-LABEL: test_multiple_div_ss:
352; SSE: # BB#0:
353; SSE-NEXT: movaps %xmm0, %xmm2
354; SSE-NEXT: divss %xmm1, %xmm2
355; SSE-NEXT: divss %xmm2, %xmm0
356; SSE-NEXT: retq
357;
358; AVX-LABEL: test_multiple_div_ss:
359; AVX: # BB#0:
360; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
361; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
362; AVX-NEXT: retq
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000363 %1 = extractelement <4 x float> %b, i32 0
364 %2 = extractelement <4 x float> %a, i32 0
365 %div = fdiv float %2, %1
366 %div2 = fdiv float %2, %div
367 %3 = insertelement <4 x float> %a, float %div2, i32 0
368 ret <4 x float> %3
369}
370
Chandler Carruth23578e92014-10-03 01:37:53 +0000371; Ensure that the backend selects SSE/AVX scalar fp instructions
372; from a packed fp instrution plus a vector insert.
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +0000373
Chandler Carruth23578e92014-10-03 01:37:53 +0000374define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
375; SSE-LABEL: insert_test_add_ss:
376; SSE: # BB#0:
377; SSE-NEXT: addss %xmm1, %xmm0
378; SSE-NEXT: retq
379;
380; AVX-LABEL: insert_test_add_ss:
381; AVX: # BB#0:
382; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
383; AVX-NEXT: retq
384 %1 = fadd <4 x float> %a, %b
385 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
386 ret <4 x float> %2
387}
388
389define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
390; SSE-LABEL: insert_test_sub_ss:
391; SSE: # BB#0:
392; SSE-NEXT: subss %xmm1, %xmm0
393; SSE-NEXT: retq
394;
395; AVX-LABEL: insert_test_sub_ss:
396; AVX: # BB#0:
397; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
398; AVX-NEXT: retq
399 %1 = fsub <4 x float> %a, %b
400 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
401 ret <4 x float> %2
402}
403
404define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
405; SSE-LABEL: insert_test_mul_ss:
406; SSE: # BB#0:
407; SSE-NEXT: mulss %xmm1, %xmm0
408; SSE-NEXT: retq
409;
410; AVX-LABEL: insert_test_mul_ss:
411; AVX: # BB#0:
412; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
413; AVX-NEXT: retq
414 %1 = fmul <4 x float> %a, %b
415 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
416 ret <4 x float> %2
417}
418
419define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
420; SSE-LABEL: insert_test_div_ss:
421; SSE: # BB#0:
422; SSE-NEXT: divss %xmm1, %xmm0
423; SSE-NEXT: retq
424;
425; AVX-LABEL: insert_test_div_ss:
426; AVX: # BB#0:
427; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
428; AVX-NEXT: retq
429 %1 = fdiv <4 x float> %a, %b
430 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
431 ret <4 x float> %2
432}
433
434define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
435; SSE-LABEL: insert_test_add_sd:
436; SSE: # BB#0:
437; SSE-NEXT: addsd %xmm1, %xmm0
438; SSE-NEXT: retq
439;
440; AVX-LABEL: insert_test_add_sd:
441; AVX: # BB#0:
442; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
443; AVX-NEXT: retq
444 %1 = fadd <2 x double> %a, %b
445 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
446 ret <2 x double> %2
447}
448
449define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
450; SSE-LABEL: insert_test_sub_sd:
451; SSE: # BB#0:
452; SSE-NEXT: subsd %xmm1, %xmm0
453; SSE-NEXT: retq
454;
455; AVX-LABEL: insert_test_sub_sd:
456; AVX: # BB#0:
457; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
458; AVX-NEXT: retq
459 %1 = fsub <2 x double> %a, %b
460 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
461 ret <2 x double> %2
462}
463
464define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
465; SSE-LABEL: insert_test_mul_sd:
466; SSE: # BB#0:
467; SSE-NEXT: mulsd %xmm1, %xmm0
468; SSE-NEXT: retq
469;
470; AVX-LABEL: insert_test_mul_sd:
471; AVX: # BB#0:
472; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
473; AVX-NEXT: retq
474 %1 = fmul <2 x double> %a, %b
475 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
476 ret <2 x double> %2
477}
478
479define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
480; SSE-LABEL: insert_test_div_sd:
481; SSE: # BB#0:
482; SSE-NEXT: divsd %xmm1, %xmm0
483; SSE-NEXT: retq
484;
485; AVX-LABEL: insert_test_div_sd:
486; AVX: # BB#0:
487; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
488; AVX-NEXT: retq
489 %1 = fdiv <2 x double> %a, %b
490 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
491 ret <2 x double> %2
492}
493
494define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
495; SSE-LABEL: insert_test2_add_ss:
496; SSE: # BB#0:
497; SSE-NEXT: addss %xmm0, %xmm1
498; SSE-NEXT: movaps %xmm1, %xmm0
499; SSE-NEXT: retq
500;
501; AVX-LABEL: insert_test2_add_ss:
502; AVX: # BB#0:
503; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
504; AVX-NEXT: retq
505 %1 = fadd <4 x float> %b, %a
506 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
507 ret <4 x float> %2
508}
509
510define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
511; SSE-LABEL: insert_test2_sub_ss:
512; SSE: # BB#0:
513; SSE-NEXT: subss %xmm0, %xmm1
514; SSE-NEXT: movaps %xmm1, %xmm0
515; SSE-NEXT: retq
516;
517; AVX-LABEL: insert_test2_sub_ss:
518; AVX: # BB#0:
519; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
520; AVX-NEXT: retq
521 %1 = fsub <4 x float> %b, %a
522 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
523 ret <4 x float> %2
524}
525
526define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
527; SSE-LABEL: insert_test2_mul_ss:
528; SSE: # BB#0:
529; SSE-NEXT: mulss %xmm0, %xmm1
530; SSE-NEXT: movaps %xmm1, %xmm0
531; SSE-NEXT: retq
532;
533; AVX-LABEL: insert_test2_mul_ss:
534; AVX: # BB#0:
535; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
536; AVX-NEXT: retq
537 %1 = fmul <4 x float> %b, %a
538 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
539 ret <4 x float> %2
540}
541
542define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
543; SSE-LABEL: insert_test2_div_ss:
544; SSE: # BB#0:
545; SSE-NEXT: divss %xmm0, %xmm1
546; SSE-NEXT: movaps %xmm1, %xmm0
547; SSE-NEXT: retq
548;
549; AVX-LABEL: insert_test2_div_ss:
550; AVX: # BB#0:
551; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
552; AVX-NEXT: retq
553 %1 = fdiv <4 x float> %b, %a
554 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
555 ret <4 x float> %2
556}
557
558define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
559; SSE-LABEL: insert_test2_add_sd:
560; SSE: # BB#0:
561; SSE-NEXT: addsd %xmm0, %xmm1
562; SSE-NEXT: movaps %xmm1, %xmm0
563; SSE-NEXT: retq
564;
565; AVX-LABEL: insert_test2_add_sd:
566; AVX: # BB#0:
567; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
568; AVX-NEXT: retq
569 %1 = fadd <2 x double> %b, %a
570 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
571 ret <2 x double> %2
572}
573
574define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
575; SSE-LABEL: insert_test2_sub_sd:
576; SSE: # BB#0:
577; SSE-NEXT: subsd %xmm0, %xmm1
578; SSE-NEXT: movaps %xmm1, %xmm0
579; SSE-NEXT: retq
580;
581; AVX-LABEL: insert_test2_sub_sd:
582; AVX: # BB#0:
583; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
584; AVX-NEXT: retq
585 %1 = fsub <2 x double> %b, %a
586 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
587 ret <2 x double> %2
588}
589
590define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
591; SSE-LABEL: insert_test2_mul_sd:
592; SSE: # BB#0:
593; SSE-NEXT: mulsd %xmm0, %xmm1
594; SSE-NEXT: movaps %xmm1, %xmm0
595; SSE-NEXT: retq
596;
597; AVX-LABEL: insert_test2_mul_sd:
598; AVX: # BB#0:
599; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
600; AVX-NEXT: retq
601 %1 = fmul <2 x double> %b, %a
602 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
603 ret <2 x double> %2
604}
605
606define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
607; SSE-LABEL: insert_test2_div_sd:
608; SSE: # BB#0:
609; SSE-NEXT: divsd %xmm0, %xmm1
610; SSE-NEXT: movaps %xmm1, %xmm0
611; SSE-NEXT: retq
612;
613; AVX-LABEL: insert_test2_div_sd:
614; AVX: # BB#0:
615; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
616; AVX-NEXT: retq
617 %1 = fdiv <2 x double> %b, %a
618 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
619 ret <2 x double> %2
620}
621
622define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
623; SSE-LABEL: insert_test3_add_ss:
624; SSE: # BB#0:
625; SSE-NEXT: addss %xmm1, %xmm0
626; SSE-NEXT: retq
627;
628; AVX-LABEL: insert_test3_add_ss:
629; AVX: # BB#0:
630; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
631; AVX-NEXT: retq
632 %1 = fadd <4 x float> %a, %b
633 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
634 ret <4 x float> %2
635}
636
637define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
638; SSE-LABEL: insert_test3_sub_ss:
639; SSE: # BB#0:
640; SSE-NEXT: subss %xmm1, %xmm0
641; SSE-NEXT: retq
642;
643; AVX-LABEL: insert_test3_sub_ss:
644; AVX: # BB#0:
645; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
646; AVX-NEXT: retq
647 %1 = fsub <4 x float> %a, %b
648 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
649 ret <4 x float> %2
650}
651
652define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
653; SSE-LABEL: insert_test3_mul_ss:
654; SSE: # BB#0:
655; SSE-NEXT: mulss %xmm1, %xmm0
656; SSE-NEXT: retq
657;
658; AVX-LABEL: insert_test3_mul_ss:
659; AVX: # BB#0:
660; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
661; AVX-NEXT: retq
662 %1 = fmul <4 x float> %a, %b
663 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
664 ret <4 x float> %2
665}
666
667define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
668; SSE-LABEL: insert_test3_div_ss:
669; SSE: # BB#0:
670; SSE-NEXT: divss %xmm1, %xmm0
671; SSE-NEXT: retq
672;
673; AVX-LABEL: insert_test3_div_ss:
674; AVX: # BB#0:
675; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
676; AVX-NEXT: retq
677 %1 = fdiv <4 x float> %a, %b
678 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
679 ret <4 x float> %2
680}
681
682define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
683; SSE-LABEL: insert_test3_add_sd:
684; SSE: # BB#0:
685; SSE-NEXT: addsd %xmm1, %xmm0
686; SSE-NEXT: retq
687;
688; AVX-LABEL: insert_test3_add_sd:
689; AVX: # BB#0:
690; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
691; AVX-NEXT: retq
692 %1 = fadd <2 x double> %a, %b
693 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
694 ret <2 x double> %2
695}
696
697define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
698; SSE-LABEL: insert_test3_sub_sd:
699; SSE: # BB#0:
700; SSE-NEXT: subsd %xmm1, %xmm0
701; SSE-NEXT: retq
702;
703; AVX-LABEL: insert_test3_sub_sd:
704; AVX: # BB#0:
705; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
706; AVX-NEXT: retq
707 %1 = fsub <2 x double> %a, %b
708 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
709 ret <2 x double> %2
710}
711
712define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
713; SSE-LABEL: insert_test3_mul_sd:
714; SSE: # BB#0:
715; SSE-NEXT: mulsd %xmm1, %xmm0
716; SSE-NEXT: retq
717;
718; AVX-LABEL: insert_test3_mul_sd:
719; AVX: # BB#0:
720; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
721; AVX-NEXT: retq
722 %1 = fmul <2 x double> %a, %b
723 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
724 ret <2 x double> %2
725}
726
727define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
728; SSE-LABEL: insert_test3_div_sd:
729; SSE: # BB#0:
730; SSE-NEXT: divsd %xmm1, %xmm0
731; SSE-NEXT: retq
732;
733; AVX-LABEL: insert_test3_div_sd:
734; AVX: # BB#0:
735; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
736; AVX-NEXT: retq
737 %1 = fdiv <2 x double> %a, %b
738 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
739 ret <2 x double> %2
740}
741
742define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
743; SSE-LABEL: insert_test4_add_ss:
744; SSE: # BB#0:
745; SSE-NEXT: addss %xmm0, %xmm1
746; SSE-NEXT: movaps %xmm1, %xmm0
747; SSE-NEXT: retq
748;
749; AVX-LABEL: insert_test4_add_ss:
750; AVX: # BB#0:
751; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
752; AVX-NEXT: retq
753 %1 = fadd <4 x float> %b, %a
754 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
755 ret <4 x float> %2
756}
757
758define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
759; SSE-LABEL: insert_test4_sub_ss:
760; SSE: # BB#0:
761; SSE-NEXT: subss %xmm0, %xmm1
762; SSE-NEXT: movaps %xmm1, %xmm0
763; SSE-NEXT: retq
764;
765; AVX-LABEL: insert_test4_sub_ss:
766; AVX: # BB#0:
767; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
768; AVX-NEXT: retq
769 %1 = fsub <4 x float> %b, %a
770 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
771 ret <4 x float> %2
772}
773
774define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
775; SSE-LABEL: insert_test4_mul_ss:
776; SSE: # BB#0:
777; SSE-NEXT: mulss %xmm0, %xmm1
778; SSE-NEXT: movaps %xmm1, %xmm0
779; SSE-NEXT: retq
780;
781; AVX-LABEL: insert_test4_mul_ss:
782; AVX: # BB#0:
783; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
784; AVX-NEXT: retq
785 %1 = fmul <4 x float> %b, %a
786 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
787 ret <4 x float> %2
788}
789
790define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
791; SSE-LABEL: insert_test4_div_ss:
792; SSE: # BB#0:
793; SSE-NEXT: divss %xmm0, %xmm1
794; SSE-NEXT: movaps %xmm1, %xmm0
795; SSE-NEXT: retq
796;
797; AVX-LABEL: insert_test4_div_ss:
798; AVX: # BB#0:
799; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
800; AVX-NEXT: retq
801 %1 = fdiv <4 x float> %b, %a
802 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
803 ret <4 x float> %2
804}
805
806define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
807; SSE-LABEL: insert_test4_add_sd:
808; SSE: # BB#0:
809; SSE-NEXT: addsd %xmm0, %xmm1
810; SSE-NEXT: movaps %xmm1, %xmm0
811; SSE-NEXT: retq
812;
813; AVX-LABEL: insert_test4_add_sd:
814; AVX: # BB#0:
815; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
816; AVX-NEXT: retq
817 %1 = fadd <2 x double> %b, %a
818 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
819 ret <2 x double> %2
820}
821
822define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
823; SSE-LABEL: insert_test4_sub_sd:
824; SSE: # BB#0:
825; SSE-NEXT: subsd %xmm0, %xmm1
826; SSE-NEXT: movaps %xmm1, %xmm0
827; SSE-NEXT: retq
828;
829; AVX-LABEL: insert_test4_sub_sd:
830; AVX: # BB#0:
831; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
832; AVX-NEXT: retq
833 %1 = fsub <2 x double> %b, %a
834 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
835 ret <2 x double> %2
836}
837
838define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
839; SSE-LABEL: insert_test4_mul_sd:
840; SSE: # BB#0:
841; SSE-NEXT: mulsd %xmm0, %xmm1
842; SSE-NEXT: movaps %xmm1, %xmm0
843; SSE-NEXT: retq
844;
845; AVX-LABEL: insert_test4_mul_sd:
846; AVX: # BB#0:
847; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
848; AVX-NEXT: retq
849 %1 = fmul <2 x double> %b, %a
850 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
851 ret <2 x double> %2
852}
853
854define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
855; SSE-LABEL: insert_test4_div_sd:
856; SSE: # BB#0:
857; SSE-NEXT: divsd %xmm0, %xmm1
858; SSE-NEXT: movaps %xmm1, %xmm0
859; SSE-NEXT: retq
860;
861; AVX-LABEL: insert_test4_div_sd:
862; AVX: # BB#0:
863; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
864; AVX-NEXT: retq
865 %1 = fdiv <2 x double> %b, %a
866 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
867 ret <2 x double> %2
868}