blob: 3949a835e67a8e5e87efe5c9a65339af3bef018b [file] [log] [blame]
Andrea Di Biagiof7c33c82013-12-10 15:22:48 +00001; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
2; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
3; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
4
5; Ensure that the backend no longer emits unnecessary vector insert
6; instructions immediately after SSE scalar fp instructions
7; like addss or mulss.
8
9
10define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
11 %1 = extractelement <4 x float> %b, i32 0
12 %2 = extractelement <4 x float> %a, i32 0
13 %add = fadd float %2, %1
14 %3 = insertelement <4 x float> %a, float %add, i32 0
15 ret <4 x float> %3
16}
17
18; CHECK-LABEL: test_add_ss
19; SSE2: addss %xmm1, %xmm0
20; AVX: vaddss %xmm1, %xmm0, %xmm0
21; CHECK-NOT: movss
22; CHECK: ret
23
24
25define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
26 %1 = extractelement <4 x float> %b, i32 0
27 %2 = extractelement <4 x float> %a, i32 0
28 %sub = fsub float %2, %1
29 %3 = insertelement <4 x float> %a, float %sub, i32 0
30 ret <4 x float> %3
31}
32
33; CHECK-LABEL: test_sub_ss
34; SSE2: subss %xmm1, %xmm0
35; AVX: vsubss %xmm1, %xmm0, %xmm0
36; CHECK-NOT: movss
37; CHECK: ret
38
39define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
40 %1 = extractelement <4 x float> %b, i32 0
41 %2 = extractelement <4 x float> %a, i32 0
42 %mul = fmul float %2, %1
43 %3 = insertelement <4 x float> %a, float %mul, i32 0
44 ret <4 x float> %3
45}
46
47; CHECK-LABEL: test_mul_ss
48; SSE2: mulss %xmm1, %xmm0
49; AVX: vmulss %xmm1, %xmm0, %xmm0
50; CHECK-NOT: movss
51; CHECK: ret
52
53
54define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
55 %1 = extractelement <4 x float> %b, i32 0
56 %2 = extractelement <4 x float> %a, i32 0
57 %div = fdiv float %2, %1
58 %3 = insertelement <4 x float> %a, float %div, i32 0
59 ret <4 x float> %3
60}
61
62; CHECK-LABEL: test_div_ss
63; SSE2: divss %xmm1, %xmm0
64; AVX: vdivss %xmm1, %xmm0, %xmm0
65; CHECK-NOT: movss
66; CHECK: ret
67
68
69define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
70 %1 = extractelement <2 x double> %b, i32 0
71 %2 = extractelement <2 x double> %a, i32 0
72 %add = fadd double %2, %1
73 %3 = insertelement <2 x double> %a, double %add, i32 0
74 ret <2 x double> %3
75}
76
77; CHECK-LABEL: test_add_sd
78; SSE2: addsd %xmm1, %xmm0
79; AVX: vaddsd %xmm1, %xmm0, %xmm0
80; CHECK-NOT: movsd
81; CHECK: ret
82
83
84define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
85 %1 = extractelement <2 x double> %b, i32 0
86 %2 = extractelement <2 x double> %a, i32 0
87 %sub = fsub double %2, %1
88 %3 = insertelement <2 x double> %a, double %sub, i32 0
89 ret <2 x double> %3
90}
91
92; CHECK-LABEL: test_sub_sd
93; SSE2: subsd %xmm1, %xmm0
94; AVX: vsubsd %xmm1, %xmm0, %xmm0
95; CHECK-NOT: movsd
96; CHECK: ret
97
98
99define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
100 %1 = extractelement <2 x double> %b, i32 0
101 %2 = extractelement <2 x double> %a, i32 0
102 %mul = fmul double %2, %1
103 %3 = insertelement <2 x double> %a, double %mul, i32 0
104 ret <2 x double> %3
105}
106
107; CHECK-LABEL: test_mul_sd
108; SSE2: mulsd %xmm1, %xmm0
109; AVX: vmulsd %xmm1, %xmm0, %xmm0
110; CHECK-NOT: movsd
111; CHECK: ret
112
113
114define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
115 %1 = extractelement <2 x double> %b, i32 0
116 %2 = extractelement <2 x double> %a, i32 0
117 %div = fdiv double %2, %1
118 %3 = insertelement <2 x double> %a, double %div, i32 0
119 ret <2 x double> %3
120}
121
122; CHECK-LABEL: test_div_sd
123; SSE2: divsd %xmm1, %xmm0
124; AVX: vdivsd %xmm1, %xmm0, %xmm0
125; CHECK-NOT: movsd
126; CHECK: ret
127
128
129define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
130 %1 = extractelement <4 x float> %a, i32 0
131 %2 = extractelement <4 x float> %b, i32 0
132 %add = fadd float %1, %2
133 %3 = insertelement <4 x float> %b, float %add, i32 0
134 ret <4 x float> %3
135}
136
137; CHECK-LABEL: test2_add_ss
138; SSE2: addss %xmm0, %xmm1
139; AVX: vaddss %xmm0, %xmm1, %xmm0
140; CHECK-NOT: movss
141; CHECK: ret
142
143
144define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
145 %1 = extractelement <4 x float> %a, i32 0
146 %2 = extractelement <4 x float> %b, i32 0
147 %sub = fsub float %2, %1
148 %3 = insertelement <4 x float> %b, float %sub, i32 0
149 ret <4 x float> %3
150}
151
152; CHECK-LABEL: test2_sub_ss
153; SSE2: subss %xmm0, %xmm1
154; AVX: vsubss %xmm0, %xmm1, %xmm0
155; CHECK-NOT: movss
156; CHECK: ret
157
158
159define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
160 %1 = extractelement <4 x float> %a, i32 0
161 %2 = extractelement <4 x float> %b, i32 0
162 %mul = fmul float %1, %2
163 %3 = insertelement <4 x float> %b, float %mul, i32 0
164 ret <4 x float> %3
165}
166
167; CHECK-LABEL: test2_mul_ss
168; SSE2: mulss %xmm0, %xmm1
169; AVX: vmulss %xmm0, %xmm1, %xmm0
170; CHECK-NOT: movss
171; CHECK: ret
172
173
174define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
175 %1 = extractelement <4 x float> %a, i32 0
176 %2 = extractelement <4 x float> %b, i32 0
177 %div = fdiv float %2, %1
178 %3 = insertelement <4 x float> %b, float %div, i32 0
179 ret <4 x float> %3
180}
181
182; CHECK-LABEL: test2_div_ss
183; SSE2: divss %xmm0, %xmm1
184; AVX: vdivss %xmm0, %xmm1, %xmm0
185; CHECK-NOT: movss
186; CHECK: ret
187
188
189define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
190 %1 = extractelement <2 x double> %a, i32 0
191 %2 = extractelement <2 x double> %b, i32 0
192 %add = fadd double %1, %2
193 %3 = insertelement <2 x double> %b, double %add, i32 0
194 ret <2 x double> %3
195}
196
197; CHECK-LABEL: test2_add_sd
198; SSE2: addsd %xmm0, %xmm1
199; AVX: vaddsd %xmm0, %xmm1, %xmm0
200; CHECK-NOT: movsd
201; CHECK: ret
202
203
204define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
205 %1 = extractelement <2 x double> %a, i32 0
206 %2 = extractelement <2 x double> %b, i32 0
207 %sub = fsub double %2, %1
208 %3 = insertelement <2 x double> %b, double %sub, i32 0
209 ret <2 x double> %3
210}
211
212; CHECK-LABEL: test2_sub_sd
213; SSE2: subsd %xmm0, %xmm1
214; AVX: vsubsd %xmm0, %xmm1, %xmm0
215; CHECK-NOT: movsd
216; CHECK: ret
217
218
219define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
220 %1 = extractelement <2 x double> %a, i32 0
221 %2 = extractelement <2 x double> %b, i32 0
222 %mul = fmul double %1, %2
223 %3 = insertelement <2 x double> %b, double %mul, i32 0
224 ret <2 x double> %3
225}
226
227; CHECK-LABEL: test2_mul_sd
228; SSE2: mulsd %xmm0, %xmm1
229; AVX: vmulsd %xmm0, %xmm1, %xmm0
230; CHECK-NOT: movsd
231; CHECK: ret
232
233
234define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
235 %1 = extractelement <2 x double> %a, i32 0
236 %2 = extractelement <2 x double> %b, i32 0
237 %div = fdiv double %2, %1
238 %3 = insertelement <2 x double> %b, double %div, i32 0
239 ret <2 x double> %3
240}
241
242; CHECK-LABEL: test2_div_sd
243; SSE2: divsd %xmm0, %xmm1
244; AVX: vdivsd %xmm0, %xmm1, %xmm0
245; CHECK-NOT: movsd
246; CHECK: ret
247
248
249define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
250 %1 = extractelement <4 x float> %b, i32 0
251 %2 = extractelement <4 x float> %a, i32 0
252 %add = fadd float %2, %1
253 %add2 = fadd float %2, %add
254 %3 = insertelement <4 x float> %a, float %add2, i32 0
255 ret <4 x float> %3
256}
257
258; CHECK-LABEL: test_multiple_add_ss
259; CHECK: addss
260; CHECK: addss
261; CHECK-NOT: movss
262; CHECK: ret
263
264
265define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
266 %1 = extractelement <4 x float> %b, i32 0
267 %2 = extractelement <4 x float> %a, i32 0
268 %sub = fsub float %2, %1
269 %sub2 = fsub float %2, %sub
270 %3 = insertelement <4 x float> %a, float %sub2, i32 0
271 ret <4 x float> %3
272}
273
274; CHECK-LABEL: test_multiple_sub_ss
275; CHECK: subss
276; CHECK: subss
277; CHECK-NOT: movss
278; CHECK: ret
279
280
281define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
282 %1 = extractelement <4 x float> %b, i32 0
283 %2 = extractelement <4 x float> %a, i32 0
284 %mul = fmul float %2, %1
285 %mul2 = fmul float %2, %mul
286 %3 = insertelement <4 x float> %a, float %mul2, i32 0
287 ret <4 x float> %3
288}
289
290; CHECK-LABEL: test_multiple_mul_ss
291; CHECK: mulss
292; CHECK: mulss
293; CHECK-NOT: movss
294; CHECK: ret
295
296define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
297 %1 = extractelement <4 x float> %b, i32 0
298 %2 = extractelement <4 x float> %a, i32 0
299 %div = fdiv float %2, %1
300 %div2 = fdiv float %2, %div
301 %3 = insertelement <4 x float> %a, float %div2, i32 0
302 ret <4 x float> %3
303}
304
305; CHECK-LABEL: test_multiple_div_ss
306; CHECK: divss
307; CHECK: divss
308; CHECK-NOT: movss
309; CHECK: ret
310