blob: 52e6b61aedfe850ba05bbdda5f5f1d525728afb9 [file] [log] [blame]
Simon Pilgrim93986492017-04-18 19:04:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
11
12define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
13; GENERIC-LABEL: test_addps:
14; GENERIC: # BB#0:
15; GENERIC-NEXT: addps %xmm1, %xmm0
16; GENERIC-NEXT: addps (%rdi), %xmm0
17; GENERIC-NEXT: retq
18;
19; ATOM-LABEL: test_addps:
20; ATOM: # BB#0:
21; ATOM-NEXT: addps %xmm1, %xmm0
22; ATOM-NEXT: addps (%rdi), %xmm0
23; ATOM-NEXT: retq
24;
25; SLM-LABEL: test_addps:
26; SLM: # BB#0:
27; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
28; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
29; SLM-NEXT: retq # sched: [4:1.00]
30;
31; SANDY-LABEL: test_addps:
32; SANDY: # BB#0:
33; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
34; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
35; SANDY-NEXT: retq # sched: [5:1.00]
36;
37; HASWELL-LABEL: test_addps:
38; HASWELL: # BB#0:
39; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
40; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
41; HASWELL-NEXT: retq # sched: [1:1.00]
42;
43; BTVER2-LABEL: test_addps:
44; BTVER2: # BB#0:
45; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
46; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
47; BTVER2-NEXT: retq # sched: [4:1.00]
48 %1 = fadd <4 x float> %a0, %a1
49 %2 = load <4 x float>, <4 x float> *%a2, align 16
50 %3 = fadd <4 x float> %1, %2
51 ret <4 x float> %3
52}
53
54define float @test_addss(float %a0, float %a1, float *%a2) {
55; GENERIC-LABEL: test_addss:
56; GENERIC: # BB#0:
57; GENERIC-NEXT: addss %xmm1, %xmm0
58; GENERIC-NEXT: addss (%rdi), %xmm0
59; GENERIC-NEXT: retq
60;
61; ATOM-LABEL: test_addss:
62; ATOM: # BB#0:
63; ATOM-NEXT: addss %xmm1, %xmm0
64; ATOM-NEXT: addss (%rdi), %xmm0
65; ATOM-NEXT: retq
66;
67; SLM-LABEL: test_addss:
68; SLM: # BB#0:
69; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
70; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
71; SLM-NEXT: retq # sched: [4:1.00]
72;
73; SANDY-LABEL: test_addss:
74; SANDY: # BB#0:
75; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
76; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
77; SANDY-NEXT: retq # sched: [5:1.00]
78;
79; HASWELL-LABEL: test_addss:
80; HASWELL: # BB#0:
81; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
82; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
83; HASWELL-NEXT: retq # sched: [1:1.00]
84;
85; BTVER2-LABEL: test_addss:
86; BTVER2: # BB#0:
87; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
88; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
89; BTVER2-NEXT: retq # sched: [4:1.00]
90 %1 = fadd float %a0, %a1
91 %2 = load float, float *%a2, align 4
92 %3 = fadd float %1, %2
93 ret float %3
94}
95
96define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
97; GENERIC-LABEL: test_andps:
98; GENERIC: # BB#0:
99; GENERIC-NEXT: andps %xmm1, %xmm0
100; GENERIC-NEXT: andps (%rdi), %xmm0
101; GENERIC-NEXT: retq
102;
103; ATOM-LABEL: test_andps:
104; ATOM: # BB#0:
105; ATOM-NEXT: andps %xmm1, %xmm0
106; ATOM-NEXT: andps (%rdi), %xmm0
107; ATOM-NEXT: nop
108; ATOM-NEXT: nop
109; ATOM-NEXT: nop
110; ATOM-NEXT: nop
111; ATOM-NEXT: nop
112; ATOM-NEXT: nop
113; ATOM-NEXT: nop
114; ATOM-NEXT: nop
115; ATOM-NEXT: retq
116;
117; SLM-LABEL: test_andps:
118; SLM: # BB#0:
119; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
120; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
121; SLM-NEXT: retq # sched: [4:1.00]
122;
123; SANDY-LABEL: test_andps:
124; SANDY: # BB#0:
125; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
126; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
127; SANDY-NEXT: retq # sched: [5:1.00]
128;
129; HASWELL-LABEL: test_andps:
130; HASWELL: # BB#0:
131; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
132; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
133; HASWELL-NEXT: retq # sched: [1:1.00]
134;
135; BTVER2-LABEL: test_andps:
136; BTVER2: # BB#0:
137; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
138; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
139; BTVER2-NEXT: retq # sched: [4:1.00]
140 %1 = bitcast <4 x float> %a0 to <4 x i32>
141 %2 = bitcast <4 x float> %a1 to <4 x i32>
142 %3 = and <4 x i32> %1, %2
143 %4 = load <4 x float>, <4 x float> *%a2, align 16
144 %5 = bitcast <4 x float> %4 to <4 x i32>
145 %6 = and <4 x i32> %3, %5
146 %7 = bitcast <4 x i32> %6 to <4 x float>
147 ret <4 x float> %7
148}
149
150define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
151; GENERIC-LABEL: test_andnotps:
152; GENERIC: # BB#0:
153; GENERIC-NEXT: andnps %xmm1, %xmm0
154; GENERIC-NEXT: andnps (%rdi), %xmm0
155; GENERIC-NEXT: retq
156;
157; ATOM-LABEL: test_andnotps:
158; ATOM: # BB#0:
159; ATOM-NEXT: andnps %xmm1, %xmm0
160; ATOM-NEXT: andnps (%rdi), %xmm0
161; ATOM-NEXT: nop
162; ATOM-NEXT: nop
163; ATOM-NEXT: nop
164; ATOM-NEXT: nop
165; ATOM-NEXT: nop
166; ATOM-NEXT: nop
167; ATOM-NEXT: nop
168; ATOM-NEXT: nop
169; ATOM-NEXT: retq
170;
171; SLM-LABEL: test_andnotps:
172; SLM: # BB#0:
173; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
174; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
175; SLM-NEXT: retq # sched: [4:1.00]
176;
177; SANDY-LABEL: test_andnotps:
178; SANDY: # BB#0:
179; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
180; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
181; SANDY-NEXT: retq # sched: [5:1.00]
182;
183; HASWELL-LABEL: test_andnotps:
184; HASWELL: # BB#0:
185; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
186; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
187; HASWELL-NEXT: retq # sched: [1:1.00]
188;
189; BTVER2-LABEL: test_andnotps:
190; BTVER2: # BB#0:
191; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
192; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
193; BTVER2-NEXT: retq # sched: [4:1.00]
194 %1 = bitcast <4 x float> %a0 to <4 x i32>
195 %2 = bitcast <4 x float> %a1 to <4 x i32>
196 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
197 %4 = and <4 x i32> %3, %2
198 %5 = load <4 x float>, <4 x float> *%a2, align 16
199 %6 = bitcast <4 x float> %5 to <4 x i32>
200 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
201 %8 = and <4 x i32> %6, %7
202 %9 = bitcast <4 x i32> %8 to <4 x float>
203 ret <4 x float> %9
204}
205
206define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
207; GENERIC-LABEL: test_cmpps:
208; GENERIC: # BB#0:
209; GENERIC-NEXT: cmpeqps %xmm0, %xmm1
210; GENERIC-NEXT: cmpeqps (%rdi), %xmm0
211; GENERIC-NEXT: orps %xmm1, %xmm0
212; GENERIC-NEXT: retq
213;
214; ATOM-LABEL: test_cmpps:
215; ATOM: # BB#0:
216; ATOM-NEXT: cmpeqps %xmm0, %xmm1
217; ATOM-NEXT: cmpeqps (%rdi), %xmm0
218; ATOM-NEXT: orps %xmm1, %xmm0
219; ATOM-NEXT: retq
220;
221; SLM-LABEL: test_cmpps:
222; SLM: # BB#0:
223; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
224; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
225; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
226; SLM-NEXT: retq # sched: [4:1.00]
227;
228; SANDY-LABEL: test_cmpps:
229; SANDY: # BB#0:
230; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
231; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
232; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
233; SANDY-NEXT: retq # sched: [5:1.00]
234;
235; HASWELL-LABEL: test_cmpps:
236; HASWELL: # BB#0:
237; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
238; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
239; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
240; HASWELL-NEXT: retq # sched: [1:1.00]
241;
242; BTVER2-LABEL: test_cmpps:
243; BTVER2: # BB#0:
244; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
245; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
246; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
247; BTVER2-NEXT: retq # sched: [4:1.00]
248 %1 = fcmp oeq <4 x float> %a0, %a1
249 %2 = load <4 x float>, <4 x float> *%a2, align 16
250 %3 = fcmp oeq <4 x float> %a0, %2
251 %4 = or <4 x i1> %1, %3
252 %5 = sext <4 x i1> %4 to <4 x i32>
253 %6 = bitcast <4 x i32> %5 to <4 x float>
254 ret <4 x float> %6
255}
256
257define float @test_cmpss(float %a0, float %a1, float *%a2) {
258; GENERIC-LABEL: test_cmpss:
259; GENERIC: # BB#0:
260; GENERIC-NEXT: cmpeqss %xmm1, %xmm0
261; GENERIC-NEXT: cmpeqss (%rdi), %xmm0
262; GENERIC-NEXT: retq
263;
264; ATOM-LABEL: test_cmpss:
265; ATOM: # BB#0:
266; ATOM-NEXT: cmpeqss %xmm1, %xmm0
267; ATOM-NEXT: cmpeqss (%rdi), %xmm0
268; ATOM-NEXT: retq
269;
270; SLM-LABEL: test_cmpss:
271; SLM: # BB#0:
272; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
273; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
274; SLM-NEXT: retq # sched: [4:1.00]
275;
276; SANDY-LABEL: test_cmpss:
277; SANDY: # BB#0:
278; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
279; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
280; SANDY-NEXT: retq # sched: [5:1.00]
281;
282; HASWELL-LABEL: test_cmpss:
283; HASWELL: # BB#0:
284; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
285; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
286; HASWELL-NEXT: retq # sched: [1:1.00]
287;
288; BTVER2-LABEL: test_cmpss:
289; BTVER2: # BB#0:
290; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
291; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
292; BTVER2-NEXT: retq # sched: [4:1.00]
293 %1 = insertelement <4 x float> undef, float %a0, i32 0
294 %2 = insertelement <4 x float> undef, float %a1, i32 0
295 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
296 %4 = load float, float *%a2, align 4
297 %5 = insertelement <4 x float> undef, float %4, i32 0
298 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
299 %7 = extractelement <4 x float> %6, i32 0
300 ret float %7
301}
302declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
303
304define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
305; GENERIC-LABEL: test_comiss:
306; GENERIC: # BB#0:
307; GENERIC-NEXT: comiss %xmm1, %xmm0
308; GENERIC-NEXT: setnp %al
309; GENERIC-NEXT: sete %cl
310; GENERIC-NEXT: andb %al, %cl
311; GENERIC-NEXT: comiss (%rdi), %xmm0
312; GENERIC-NEXT: setnp %al
313; GENERIC-NEXT: sete %dl
314; GENERIC-NEXT: andb %al, %dl
315; GENERIC-NEXT: orb %cl, %dl
316; GENERIC-NEXT: movzbl %dl, %eax
317; GENERIC-NEXT: retq
318;
319; ATOM-LABEL: test_comiss:
320; ATOM: # BB#0:
321; ATOM-NEXT: comiss %xmm1, %xmm0
322; ATOM-NEXT: setnp %al
323; ATOM-NEXT: sete %cl
324; ATOM-NEXT: andb %al, %cl
325; ATOM-NEXT: comiss (%rdi), %xmm0
326; ATOM-NEXT: setnp %al
327; ATOM-NEXT: sete %dl
328; ATOM-NEXT: andb %al, %dl
329; ATOM-NEXT: orb %cl, %dl
330; ATOM-NEXT: movzbl %dl, %eax
331; ATOM-NEXT: retq
332;
333; SLM-LABEL: test_comiss:
334; SLM: # BB#0:
335; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
336; SLM-NEXT: setnp %al # sched: [1:0.50]
337; SLM-NEXT: sete %cl # sched: [1:0.50]
338; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
339; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
340; SLM-NEXT: setnp %al # sched: [1:0.50]
341; SLM-NEXT: sete %dl # sched: [1:0.50]
342; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
343; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
344; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
345; SLM-NEXT: retq # sched: [4:1.00]
346;
347; SANDY-LABEL: test_comiss:
348; SANDY: # BB#0:
349; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
350; SANDY-NEXT: setnp %al # sched: [1:0.33]
351; SANDY-NEXT: sete %cl # sched: [1:0.33]
352; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
353; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
354; SANDY-NEXT: setnp %al # sched: [1:0.33]
355; SANDY-NEXT: sete %dl # sched: [1:0.33]
356; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
357; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
358; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
359; SANDY-NEXT: retq # sched: [5:1.00]
360;
361; HASWELL-LABEL: test_comiss:
362; HASWELL: # BB#0:
363; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
364; HASWELL-NEXT: setnp %al # sched: [1:0.50]
365; HASWELL-NEXT: sete %cl # sched: [1:0.50]
366; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
367; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
368; HASWELL-NEXT: setnp %al # sched: [1:0.50]
369; HASWELL-NEXT: sete %dl # sched: [1:0.50]
370; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
371; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
372; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
373; HASWELL-NEXT: retq # sched: [1:1.00]
374;
375; BTVER2-LABEL: test_comiss:
376; BTVER2: # BB#0:
377; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
378; BTVER2-NEXT: setnp %al # sched: [1:0.50]
379; BTVER2-NEXT: sete %cl # sched: [1:0.50]
380; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
381; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
382; BTVER2-NEXT: setnp %al # sched: [1:0.50]
383; BTVER2-NEXT: sete %dl # sched: [1:0.50]
384; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
385; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
386; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
387; BTVER2-NEXT: retq # sched: [4:1.00]
388 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
389 %2 = load <4 x float>, <4 x float> *%a2, align 4
390 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
391 %4 = or i32 %1, %3
392 ret i32 %4
393}
394declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
395
396define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
397; GENERIC-LABEL: test_cvtsi2ss:
398; GENERIC: # BB#0:
399; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1
400; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0
401; GENERIC-NEXT: addss %xmm1, %xmm0
402; GENERIC-NEXT: retq
403;
404; ATOM-LABEL: test_cvtsi2ss:
405; ATOM: # BB#0:
406; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0
407; ATOM-NEXT: cvtsi2ssl %edi, %xmm1
408; ATOM-NEXT: addss %xmm1, %xmm0
409; ATOM-NEXT: retq
410;
411; SLM-LABEL: test_cvtsi2ss:
412; SLM: # BB#0:
413; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
414; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
415; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
416; SLM-NEXT: retq # sched: [4:1.00]
417;
418; SANDY-LABEL: test_cvtsi2ss:
419; SANDY: # BB#0:
420; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
421; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
422; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
423; SANDY-NEXT: retq # sched: [5:1.00]
424;
425; HASWELL-LABEL: test_cvtsi2ss:
426; HASWELL: # BB#0:
427; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
428; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
429; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
430; HASWELL-NEXT: retq # sched: [1:1.00]
431;
432; BTVER2-LABEL: test_cvtsi2ss:
433; BTVER2: # BB#0:
434; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
435; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
436; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
437; BTVER2-NEXT: retq # sched: [4:1.00]
438 %1 = sitofp i32 %a0 to float
439 %2 = load i32, i32 *%a1, align 4
440 %3 = sitofp i32 %2 to float
441 %4 = fadd float %1, %3
442 ret float %4
443}
444
445define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
446; GENERIC-LABEL: test_cvtsi2ssq:
447; GENERIC: # BB#0:
448; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1
449; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0
450; GENERIC-NEXT: addss %xmm1, %xmm0
451; GENERIC-NEXT: retq
452;
453; ATOM-LABEL: test_cvtsi2ssq:
454; ATOM: # BB#0:
455; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0
456; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1
457; ATOM-NEXT: addss %xmm1, %xmm0
458; ATOM-NEXT: retq
459;
460; SLM-LABEL: test_cvtsi2ssq:
461; SLM: # BB#0:
462; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
463; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
464; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
465; SLM-NEXT: retq # sched: [4:1.00]
466;
467; SANDY-LABEL: test_cvtsi2ssq:
468; SANDY: # BB#0:
469; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
470; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
471; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
472; SANDY-NEXT: retq # sched: [5:1.00]
473;
474; HASWELL-LABEL: test_cvtsi2ssq:
475; HASWELL: # BB#0:
476; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
477; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
478; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
479; HASWELL-NEXT: retq # sched: [1:1.00]
480;
481; BTVER2-LABEL: test_cvtsi2ssq:
482; BTVER2: # BB#0:
483; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
484; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
485; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
486; BTVER2-NEXT: retq # sched: [4:1.00]
487 %1 = sitofp i64 %a0 to float
488 %2 = load i64, i64 *%a1, align 8
489 %3 = sitofp i64 %2 to float
490 %4 = fadd float %1, %3
491 ret float %4
492}
493
494define i32 @test_cvtss2si(float %a0, float *%a1) {
495; GENERIC-LABEL: test_cvtss2si:
496; GENERIC: # BB#0:
497; GENERIC-NEXT: cvtss2si %xmm0, %ecx
498; GENERIC-NEXT: cvtss2si (%rdi), %eax
499; GENERIC-NEXT: addl %ecx, %eax
500; GENERIC-NEXT: retq
501;
502; ATOM-LABEL: test_cvtss2si:
503; ATOM: # BB#0:
504; ATOM-NEXT: cvtss2si (%rdi), %eax
505; ATOM-NEXT: cvtss2si %xmm0, %ecx
506; ATOM-NEXT: addl %ecx, %eax
507; ATOM-NEXT: retq
508;
509; SLM-LABEL: test_cvtss2si:
510; SLM: # BB#0:
511; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
512; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
513; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
514; SLM-NEXT: retq # sched: [4:1.00]
515;
516; SANDY-LABEL: test_cvtss2si:
517; SANDY: # BB#0:
518; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
519; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00]
520; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
521; SANDY-NEXT: retq # sched: [5:1.00]
522;
523; HASWELL-LABEL: test_cvtss2si:
524; HASWELL: # BB#0:
525; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
526; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
527; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
528; HASWELL-NEXT: retq # sched: [1:1.00]
529;
530; BTVER2-LABEL: test_cvtss2si:
531; BTVER2: # BB#0:
532; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
533; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
534; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
535; BTVER2-NEXT: retq # sched: [4:1.00]
536 %1 = insertelement <4 x float> undef, float %a0, i32 0
537 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
538 %3 = load float, float *%a1, align 4
539 %4 = insertelement <4 x float> undef, float %3, i32 0
540 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
541 %6 = add i32 %2, %5
542 ret i32 %6
543}
544declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
545
546define i64 @test_cvtss2siq(float %a0, float *%a1) {
547; GENERIC-LABEL: test_cvtss2siq:
548; GENERIC: # BB#0:
549; GENERIC-NEXT: cvtss2si %xmm0, %rcx
550; GENERIC-NEXT: cvtss2si (%rdi), %rax
551; GENERIC-NEXT: addq %rcx, %rax
552; GENERIC-NEXT: retq
553;
554; ATOM-LABEL: test_cvtss2siq:
555; ATOM: # BB#0:
556; ATOM-NEXT: cvtss2si (%rdi), %rax
557; ATOM-NEXT: cvtss2si %xmm0, %rcx
558; ATOM-NEXT: addq %rcx, %rax
559; ATOM-NEXT: retq
560;
561; SLM-LABEL: test_cvtss2siq:
562; SLM: # BB#0:
563; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
564; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
565; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
566; SLM-NEXT: retq # sched: [4:1.00]
567;
568; SANDY-LABEL: test_cvtss2siq:
569; SANDY: # BB#0:
570; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
571; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00]
572; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
573; SANDY-NEXT: retq # sched: [5:1.00]
574;
575; HASWELL-LABEL: test_cvtss2siq:
576; HASWELL: # BB#0:
577; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
578; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
579; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
580; HASWELL-NEXT: retq # sched: [1:1.00]
581;
582; BTVER2-LABEL: test_cvtss2siq:
583; BTVER2: # BB#0:
584; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
585; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
586; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
587; BTVER2-NEXT: retq # sched: [4:1.00]
588 %1 = insertelement <4 x float> undef, float %a0, i32 0
589 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
590 %3 = load float, float *%a1, align 4
591 %4 = insertelement <4 x float> undef, float %3, i32 0
592 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
593 %6 = add i64 %2, %5
594 ret i64 %6
595}
596declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
597
598define i32 @test_cvttss2si(float %a0, float *%a1) {
599; GENERIC-LABEL: test_cvttss2si:
600; GENERIC: # BB#0:
601; GENERIC-NEXT: cvttss2si %xmm0, %ecx
602; GENERIC-NEXT: cvttss2si (%rdi), %eax
603; GENERIC-NEXT: addl %ecx, %eax
604; GENERIC-NEXT: retq
605;
606; ATOM-LABEL: test_cvttss2si:
607; ATOM: # BB#0:
608; ATOM-NEXT: cvttss2si (%rdi), %eax
609; ATOM-NEXT: cvttss2si %xmm0, %ecx
610; ATOM-NEXT: addl %ecx, %eax
611; ATOM-NEXT: retq
612;
613; SLM-LABEL: test_cvttss2si:
614; SLM: # BB#0:
615; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
616; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
617; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
618; SLM-NEXT: retq # sched: [4:1.00]
619;
620; SANDY-LABEL: test_cvttss2si:
621; SANDY: # BB#0:
622; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
623; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00]
624; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
625; SANDY-NEXT: retq # sched: [5:1.00]
626;
627; HASWELL-LABEL: test_cvttss2si:
628; HASWELL: # BB#0:
629; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
630; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
631; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
632; HASWELL-NEXT: retq # sched: [1:1.00]
633;
634; BTVER2-LABEL: test_cvttss2si:
635; BTVER2: # BB#0:
636; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
637; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
638; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
639; BTVER2-NEXT: retq # sched: [4:1.00]
640 %1 = fptosi float %a0 to i32
641 %2 = load float, float *%a1, align 4
642 %3 = fptosi float %2 to i32
643 %4 = add i32 %1, %3
644 ret i32 %4
645}
646
647define i64 @test_cvttss2siq(float %a0, float *%a1) {
648; GENERIC-LABEL: test_cvttss2siq:
649; GENERIC: # BB#0:
650; GENERIC-NEXT: cvttss2si %xmm0, %rcx
651; GENERIC-NEXT: cvttss2si (%rdi), %rax
652; GENERIC-NEXT: addq %rcx, %rax
653; GENERIC-NEXT: retq
654;
655; ATOM-LABEL: test_cvttss2siq:
656; ATOM: # BB#0:
657; ATOM-NEXT: cvttss2si (%rdi), %rax
658; ATOM-NEXT: cvttss2si %xmm0, %rcx
659; ATOM-NEXT: addq %rcx, %rax
660; ATOM-NEXT: retq
661;
662; SLM-LABEL: test_cvttss2siq:
663; SLM: # BB#0:
664; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
665; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
666; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
667; SLM-NEXT: retq # sched: [4:1.00]
668;
669; SANDY-LABEL: test_cvttss2siq:
670; SANDY: # BB#0:
671; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
672; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00]
673; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
674; SANDY-NEXT: retq # sched: [5:1.00]
675;
676; HASWELL-LABEL: test_cvttss2siq:
677; HASWELL: # BB#0:
678; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
679; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
680; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
681; HASWELL-NEXT: retq # sched: [1:1.00]
682;
683; BTVER2-LABEL: test_cvttss2siq:
684; BTVER2: # BB#0:
685; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
686; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
687; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
688; BTVER2-NEXT: retq # sched: [4:1.00]
689 %1 = fptosi float %a0 to i64
690 %2 = load float, float *%a1, align 4
691 %3 = fptosi float %2 to i64
692 %4 = add i64 %1, %3
693 ret i64 %4
694}
695
696define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
697; GENERIC-LABEL: test_divps:
698; GENERIC: # BB#0:
699; GENERIC-NEXT: divps %xmm1, %xmm0
700; GENERIC-NEXT: divps (%rdi), %xmm0
701; GENERIC-NEXT: retq
702;
703; ATOM-LABEL: test_divps:
704; ATOM: # BB#0:
705; ATOM-NEXT: divps %xmm1, %xmm0
706; ATOM-NEXT: divps (%rdi), %xmm0
707; ATOM-NEXT: retq
708;
709; SLM-LABEL: test_divps:
710; SLM: # BB#0:
711; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
712; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
713; SLM-NEXT: retq # sched: [4:1.00]
714;
715; SANDY-LABEL: test_divps:
716; SANDY: # BB#0:
717; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
718; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
719; SANDY-NEXT: retq # sched: [5:1.00]
720;
721; HASWELL-LABEL: test_divps:
722; HASWELL: # BB#0:
723; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
724; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
725; HASWELL-NEXT: retq # sched: [1:1.00]
726;
727; BTVER2-LABEL: test_divps:
728; BTVER2: # BB#0:
729; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
730; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
731; BTVER2-NEXT: retq # sched: [4:1.00]
732 %1 = fdiv <4 x float> %a0, %a1
733 %2 = load <4 x float>, <4 x float> *%a2, align 16
734 %3 = fdiv <4 x float> %1, %2
735 ret <4 x float> %3
736}
737
738define float @test_divss(float %a0, float %a1, float *%a2) {
739; GENERIC-LABEL: test_divss:
740; GENERIC: # BB#0:
741; GENERIC-NEXT: divss %xmm1, %xmm0
742; GENERIC-NEXT: divss (%rdi), %xmm0
743; GENERIC-NEXT: retq
744;
745; ATOM-LABEL: test_divss:
746; ATOM: # BB#0:
747; ATOM-NEXT: divss %xmm1, %xmm0
748; ATOM-NEXT: divss (%rdi), %xmm0
749; ATOM-NEXT: retq
750;
751; SLM-LABEL: test_divss:
752; SLM: # BB#0:
753; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
754; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
755; SLM-NEXT: retq # sched: [4:1.00]
756;
757; SANDY-LABEL: test_divss:
758; SANDY: # BB#0:
759; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
760; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
761; SANDY-NEXT: retq # sched: [5:1.00]
762;
763; HASWELL-LABEL: test_divss:
764; HASWELL: # BB#0:
765; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
766; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
767; HASWELL-NEXT: retq # sched: [1:1.00]
768;
769; BTVER2-LABEL: test_divss:
770; BTVER2: # BB#0:
771; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
772; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
773; BTVER2-NEXT: retq # sched: [4:1.00]
774 %1 = fdiv float %a0, %a1
775 %2 = load float, float *%a2, align 4
776 %3 = fdiv float %1, %2
777 ret float %3
778}
779
780define void @test_ldmxcsr(i32 %a0) {
781; GENERIC-LABEL: test_ldmxcsr:
782; GENERIC: # BB#0:
783; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
784; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
785; GENERIC-NEXT: retq
786;
787; ATOM-LABEL: test_ldmxcsr:
788; ATOM: # BB#0:
789; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
790; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
791; ATOM-NEXT: retq
792;
793; SLM-LABEL: test_ldmxcsr:
794; SLM: # BB#0:
795; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
796; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
797; SLM-NEXT: retq # sched: [4:1.00]
798;
799; SANDY-LABEL: test_ldmxcsr:
800; SANDY: # BB#0:
801; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
802; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50]
803; SANDY-NEXT: retq # sched: [5:1.00]
804;
805; HASWELL-LABEL: test_ldmxcsr:
806; HASWELL: # BB#0:
807; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
808; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
809; HASWELL-NEXT: retq # sched: [1:1.00]
810;
811; BTVER2-LABEL: test_ldmxcsr:
812; BTVER2: # BB#0:
813; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
814; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
815; BTVER2-NEXT: retq # sched: [4:1.00]
816 %1 = alloca i32, align 4
817 %2 = bitcast i32* %1 to i8*
818 store i32 %a0, i32* %1
819 call void @llvm.x86.sse.ldmxcsr(i8* %2)
820 ret void
821}
822declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
823
824define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
825; GENERIC-LABEL: test_maxps:
826; GENERIC: # BB#0:
827; GENERIC-NEXT: maxps %xmm1, %xmm0
828; GENERIC-NEXT: maxps (%rdi), %xmm0
829; GENERIC-NEXT: retq
830;
831; ATOM-LABEL: test_maxps:
832; ATOM: # BB#0:
833; ATOM-NEXT: maxps %xmm1, %xmm0
834; ATOM-NEXT: maxps (%rdi), %xmm0
835; ATOM-NEXT: retq
836;
837; SLM-LABEL: test_maxps:
838; SLM: # BB#0:
839; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
840; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
841; SLM-NEXT: retq # sched: [4:1.00]
842;
843; SANDY-LABEL: test_maxps:
844; SANDY: # BB#0:
845; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
846; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
847; SANDY-NEXT: retq # sched: [5:1.00]
848;
849; HASWELL-LABEL: test_maxps:
850; HASWELL: # BB#0:
851; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
852; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
853; HASWELL-NEXT: retq # sched: [1:1.00]
854;
855; BTVER2-LABEL: test_maxps:
856; BTVER2: # BB#0:
857; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
858; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
859; BTVER2-NEXT: retq # sched: [4:1.00]
860 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
861 %2 = load <4 x float>, <4 x float> *%a2, align 16
862 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
863 ret <4 x float> %3
864}
865declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
866
867define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
868; GENERIC-LABEL: test_maxss:
869; GENERIC: # BB#0:
870; GENERIC-NEXT: maxss %xmm1, %xmm0
871; GENERIC-NEXT: maxss (%rdi), %xmm0
872; GENERIC-NEXT: retq
873;
874; ATOM-LABEL: test_maxss:
875; ATOM: # BB#0:
876; ATOM-NEXT: maxss %xmm1, %xmm0
877; ATOM-NEXT: maxss (%rdi), %xmm0
878; ATOM-NEXT: retq
879;
880; SLM-LABEL: test_maxss:
881; SLM: # BB#0:
882; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
883; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
884; SLM-NEXT: retq # sched: [4:1.00]
885;
886; SANDY-LABEL: test_maxss:
887; SANDY: # BB#0:
888; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
889; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
890; SANDY-NEXT: retq # sched: [5:1.00]
891;
892; HASWELL-LABEL: test_maxss:
893; HASWELL: # BB#0:
894; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
895; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
896; HASWELL-NEXT: retq # sched: [1:1.00]
897;
898; BTVER2-LABEL: test_maxss:
899; BTVER2: # BB#0:
900; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
901; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
902; BTVER2-NEXT: retq # sched: [4:1.00]
903 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
904 %2 = load <4 x float>, <4 x float> *%a2, align 16
905 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
906 ret <4 x float> %3
907}
908declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
909
910define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
911; GENERIC-LABEL: test_minps:
912; GENERIC: # BB#0:
913; GENERIC-NEXT: minps %xmm1, %xmm0
914; GENERIC-NEXT: minps (%rdi), %xmm0
915; GENERIC-NEXT: retq
916;
917; ATOM-LABEL: test_minps:
918; ATOM: # BB#0:
919; ATOM-NEXT: minps %xmm1, %xmm0
920; ATOM-NEXT: minps (%rdi), %xmm0
921; ATOM-NEXT: retq
922;
923; SLM-LABEL: test_minps:
924; SLM: # BB#0:
925; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
926; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
927; SLM-NEXT: retq # sched: [4:1.00]
928;
929; SANDY-LABEL: test_minps:
930; SANDY: # BB#0:
931; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
932; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
933; SANDY-NEXT: retq # sched: [5:1.00]
934;
935; HASWELL-LABEL: test_minps:
936; HASWELL: # BB#0:
937; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
938; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
939; HASWELL-NEXT: retq # sched: [1:1.00]
940;
941; BTVER2-LABEL: test_minps:
942; BTVER2: # BB#0:
943; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
944; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
945; BTVER2-NEXT: retq # sched: [4:1.00]
946 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
947 %2 = load <4 x float>, <4 x float> *%a2, align 16
948 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
949 ret <4 x float> %3
950}
951declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
952
953define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
954; GENERIC-LABEL: test_minss:
955; GENERIC: # BB#0:
956; GENERIC-NEXT: minss %xmm1, %xmm0
957; GENERIC-NEXT: minss (%rdi), %xmm0
958; GENERIC-NEXT: retq
959;
960; ATOM-LABEL: test_minss:
961; ATOM: # BB#0:
962; ATOM-NEXT: minss %xmm1, %xmm0
963; ATOM-NEXT: minss (%rdi), %xmm0
964; ATOM-NEXT: retq
965;
966; SLM-LABEL: test_minss:
967; SLM: # BB#0:
968; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
969; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
970; SLM-NEXT: retq # sched: [4:1.00]
971;
972; SANDY-LABEL: test_minss:
973; SANDY: # BB#0:
974; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
975; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
976; SANDY-NEXT: retq # sched: [5:1.00]
977;
978; HASWELL-LABEL: test_minss:
979; HASWELL: # BB#0:
980; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
981; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
982; HASWELL-NEXT: retq # sched: [1:1.00]
983;
984; BTVER2-LABEL: test_minss:
985; BTVER2: # BB#0:
986; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
987; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
988; BTVER2-NEXT: retq # sched: [4:1.00]
989 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
990 %2 = load <4 x float>, <4 x float> *%a2, align 16
991 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
992 ret <4 x float> %3
993}
994declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
995
996define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
997; GENERIC-LABEL: test_movaps:
998; GENERIC: # BB#0:
999; GENERIC-NEXT: movaps (%rdi), %xmm0
1000; GENERIC-NEXT: addps %xmm0, %xmm0
1001; GENERIC-NEXT: movaps %xmm0, (%rsi)
1002; GENERIC-NEXT: retq
1003;
1004; ATOM-LABEL: test_movaps:
1005; ATOM: # BB#0:
1006; ATOM-NEXT: movaps (%rdi), %xmm0
1007; ATOM-NEXT: addps %xmm0, %xmm0
1008; ATOM-NEXT: movaps %xmm0, (%rsi)
1009; ATOM-NEXT: retq
1010;
1011; SLM-LABEL: test_movaps:
1012; SLM: # BB#0:
1013; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1014; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1015; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1016; SLM-NEXT: retq # sched: [4:1.00]
1017;
1018; SANDY-LABEL: test_movaps:
1019; SANDY: # BB#0:
1020; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
1021; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1022; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1023; SANDY-NEXT: retq # sched: [5:1.00]
1024;
1025; HASWELL-LABEL: test_movaps:
1026; HASWELL: # BB#0:
1027; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
1028; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1029; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1030; HASWELL-NEXT: retq # sched: [1:1.00]
1031;
1032; BTVER2-LABEL: test_movaps:
1033; BTVER2: # BB#0:
1034; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1035; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1036; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1037; BTVER2-NEXT: retq # sched: [4:1.00]
1038 %1 = load <4 x float>, <4 x float> *%a0, align 16
1039 %2 = fadd <4 x float> %1, %1
1040 store <4 x float> %2, <4 x float> *%a1, align 16
1041 ret void
1042}
1043
1044; TODO (v)movhlps
1045
1046define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1047; GENERIC-LABEL: test_movhlps:
1048; GENERIC: # BB#0:
1049; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1050; GENERIC-NEXT: retq
1051;
1052; ATOM-LABEL: test_movhlps:
1053; ATOM: # BB#0:
1054; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1055; ATOM-NEXT: nop
1056; ATOM-NEXT: nop
1057; ATOM-NEXT: nop
1058; ATOM-NEXT: nop
1059; ATOM-NEXT: nop
1060; ATOM-NEXT: nop
1061; ATOM-NEXT: retq
1062;
1063; SLM-LABEL: test_movhlps:
1064; SLM: # BB#0:
1065; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1066; SLM-NEXT: retq # sched: [4:1.00]
1067;
1068; SANDY-LABEL: test_movhlps:
1069; SANDY: # BB#0:
1070; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1071; SANDY-NEXT: retq # sched: [5:1.00]
1072;
1073; HASWELL-LABEL: test_movhlps:
1074; HASWELL: # BB#0:
1075; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1076; HASWELL-NEXT: retq # sched: [1:1.00]
1077;
1078; BTVER2-LABEL: test_movhlps:
1079; BTVER2: # BB#0:
1080; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1081; BTVER2-NEXT: retq # sched: [4:1.00]
1082 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1083 ret <4 x float> %1
1084}
1085
1086; TODO (v)movhps
1087
1088define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1089; GENERIC-LABEL: test_movhps:
1090; GENERIC: # BB#0:
1091; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1092; GENERIC-NEXT: addps %xmm0, %xmm1
1093; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
1094; GENERIC-NEXT: movlps %xmm1, (%rdi)
1095; GENERIC-NEXT: retq
1096;
1097; ATOM-LABEL: test_movhps:
1098; ATOM: # BB#0:
1099; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1100; ATOM-NEXT: addps %xmm0, %xmm1
1101; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
1102; ATOM-NEXT: movlps %xmm1, (%rdi)
1103; ATOM-NEXT: retq
1104;
1105; SLM-LABEL: test_movhps:
1106; SLM: # BB#0:
1107; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1108; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1109; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1110; SLM-NEXT: retq # sched: [4:1.00]
1111;
1112; SANDY-LABEL: test_movhps:
1113; SANDY: # BB#0:
1114; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
1115; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1116; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1117; SANDY-NEXT: retq # sched: [5:1.00]
1118;
1119; HASWELL-LABEL: test_movhps:
1120; HASWELL: # BB#0:
1121; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
1122; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1123; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1124; HASWELL-NEXT: retq # sched: [1:1.00]
1125;
1126; BTVER2-LABEL: test_movhps:
1127; BTVER2: # BB#0:
1128; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1129; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1130; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1131; BTVER2-NEXT: retq # sched: [4:1.00]
1132 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1133 %2 = load <2 x float>, <2 x float> *%1, align 8
1134 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1135 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1136 %5 = fadd <4 x float> %a0, %4
1137 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1138 store <2 x float> %6, <2 x float>* %1
1139 ret void
1140}
1141
1142; TODO (v)movlhps
1143
1144define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1145; GENERIC-LABEL: test_movlhps:
1146; GENERIC: # BB#0:
1147; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1148; GENERIC-NEXT: addps %xmm1, %xmm0
1149; GENERIC-NEXT: retq
1150;
1151; ATOM-LABEL: test_movlhps:
1152; ATOM: # BB#0:
1153; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1154; ATOM-NEXT: addps %xmm1, %xmm0
1155; ATOM-NEXT: retq
1156;
1157; SLM-LABEL: test_movlhps:
1158; SLM: # BB#0:
1159; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1160; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1161; SLM-NEXT: retq # sched: [4:1.00]
1162;
1163; SANDY-LABEL: test_movlhps:
1164; SANDY: # BB#0:
1165; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1166; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1167; SANDY-NEXT: retq # sched: [5:1.00]
1168;
1169; HASWELL-LABEL: test_movlhps:
1170; HASWELL: # BB#0:
1171; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1172; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1173; HASWELL-NEXT: retq # sched: [1:1.00]
1174;
1175; BTVER2-LABEL: test_movlhps:
1176; BTVER2: # BB#0:
1177; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1178; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1179; BTVER2-NEXT: retq # sched: [4:1.00]
1180 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1181 %2 = fadd <4 x float> %a1, %1
1182 ret <4 x float> %2
1183}
1184
1185define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1186; GENERIC-LABEL: test_movlps:
1187; GENERIC: # BB#0:
1188; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
1189; GENERIC-NEXT: addps %xmm0, %xmm1
1190; GENERIC-NEXT: movlps %xmm1, (%rdi)
1191; GENERIC-NEXT: retq
1192;
1193; ATOM-LABEL: test_movlps:
1194; ATOM: # BB#0:
1195; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
1196; ATOM-NEXT: addps %xmm0, %xmm1
1197; ATOM-NEXT: movlps %xmm1, (%rdi)
1198; ATOM-NEXT: retq
1199;
1200; SLM-LABEL: test_movlps:
1201; SLM: # BB#0:
1202; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1203; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1204; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1205; SLM-NEXT: retq # sched: [4:1.00]
1206;
1207; SANDY-LABEL: test_movlps:
1208; SANDY: # BB#0:
1209; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
1210; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1211; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1212; SANDY-NEXT: retq # sched: [5:1.00]
1213;
1214; HASWELL-LABEL: test_movlps:
1215; HASWELL: # BB#0:
1216; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
1217; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1218; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1219; HASWELL-NEXT: retq # sched: [1:1.00]
1220;
1221; BTVER2-LABEL: test_movlps:
1222; BTVER2: # BB#0:
1223; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1224; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1225; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1226; BTVER2-NEXT: retq # sched: [4:1.00]
1227 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1228 %2 = load <2 x float>, <2 x float> *%1, align 8
1229 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1230 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1231 %5 = fadd <4 x float> %a0, %4
1232 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1233 store <2 x float> %6, <2 x float>* %1
1234 ret void
1235}
1236
1237define i32 @test_movmskps(<4 x float> %a0) {
1238; GENERIC-LABEL: test_movmskps:
1239; GENERIC: # BB#0:
1240; GENERIC-NEXT: movmskps %xmm0, %eax
1241; GENERIC-NEXT: retq
1242;
1243; ATOM-LABEL: test_movmskps:
1244; ATOM: # BB#0:
1245; ATOM-NEXT: movmskps %xmm0, %eax
1246; ATOM-NEXT: nop
1247; ATOM-NEXT: nop
1248; ATOM-NEXT: retq
1249;
1250; SLM-LABEL: test_movmskps:
1251; SLM: # BB#0:
1252; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1253; SLM-NEXT: retq # sched: [4:1.00]
1254;
1255; SANDY-LABEL: test_movmskps:
1256; SANDY: # BB#0:
1257; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33]
1258; SANDY-NEXT: retq # sched: [5:1.00]
1259;
1260; HASWELL-LABEL: test_movmskps:
1261; HASWELL: # BB#0:
1262; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
1263; HASWELL-NEXT: retq # sched: [1:1.00]
1264;
1265; BTVER2-LABEL: test_movmskps:
1266; BTVER2: # BB#0:
1267; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
1268; BTVER2-NEXT: retq # sched: [4:1.00]
1269 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1270 ret i32 %1
1271}
1272declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1273
1274define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1275; GENERIC-LABEL: test_movntps:
1276; GENERIC: # BB#0:
1277; GENERIC-NEXT: movntps %xmm0, (%rdi)
1278; GENERIC-NEXT: retq
1279;
1280; ATOM-LABEL: test_movntps:
1281; ATOM: # BB#0:
1282; ATOM-NEXT: movntps %xmm0, (%rdi)
1283; ATOM-NEXT: nop
1284; ATOM-NEXT: nop
1285; ATOM-NEXT: nop
1286; ATOM-NEXT: nop
1287; ATOM-NEXT: nop
1288; ATOM-NEXT: nop
1289; ATOM-NEXT: retq
1290;
1291; SLM-LABEL: test_movntps:
1292; SLM: # BB#0:
1293; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1294; SLM-NEXT: retq # sched: [4:1.00]
1295;
1296; SANDY-LABEL: test_movntps:
1297; SANDY: # BB#0:
1298; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1299; SANDY-NEXT: retq # sched: [5:1.00]
1300;
1301; HASWELL-LABEL: test_movntps:
1302; HASWELL: # BB#0:
1303; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1304; HASWELL-NEXT: retq # sched: [1:1.00]
1305;
1306; BTVER2-LABEL: test_movntps:
1307; BTVER2: # BB#0:
1308; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1309; BTVER2-NEXT: retq # sched: [4:1.00]
1310 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
1311 ret void
1312}
1313
1314define void @test_movss_mem(float* %a0, float* %a1) {
1315; GENERIC-LABEL: test_movss_mem:
1316; GENERIC: # BB#0:
1317; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1318; GENERIC-NEXT: addss %xmm0, %xmm0
1319; GENERIC-NEXT: movss %xmm0, (%rsi)
1320; GENERIC-NEXT: retq
1321;
1322; ATOM-LABEL: test_movss_mem:
1323; ATOM: # BB#0:
1324; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1325; ATOM-NEXT: addss %xmm0, %xmm0
1326; ATOM-NEXT: movss %xmm0, (%rsi)
1327; ATOM-NEXT: retq
1328;
1329; SLM-LABEL: test_movss_mem:
1330; SLM: # BB#0:
1331; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
1332; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1333; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1334; SLM-NEXT: retq # sched: [4:1.00]
1335;
1336; SANDY-LABEL: test_movss_mem:
1337; SANDY: # BB#0:
1338; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
1339; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1340; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1341; SANDY-NEXT: retq # sched: [5:1.00]
1342;
1343; HASWELL-LABEL: test_movss_mem:
1344; HASWELL: # BB#0:
1345; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
1346; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1347; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1348; HASWELL-NEXT: retq # sched: [1:1.00]
1349;
1350; BTVER2-LABEL: test_movss_mem:
1351; BTVER2: # BB#0:
1352; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
1353; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1354; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1355; BTVER2-NEXT: retq # sched: [4:1.00]
1356 %1 = load float, float* %a0, align 1
1357 %2 = fadd float %1, %1
1358 store float %2, float *%a1, align 1
1359 ret void
1360}
1361
1362define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
1363; GENERIC-LABEL: test_movss_reg:
1364; GENERIC: # BB#0:
1365; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1366; GENERIC-NEXT: retq
1367;
1368; ATOM-LABEL: test_movss_reg:
1369; ATOM: # BB#0:
1370; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1371; ATOM-NEXT: nop
1372; ATOM-NEXT: nop
1373; ATOM-NEXT: nop
1374; ATOM-NEXT: nop
1375; ATOM-NEXT: nop
1376; ATOM-NEXT: nop
1377; ATOM-NEXT: retq
1378;
1379; SLM-LABEL: test_movss_reg:
1380; SLM: # BB#0:
1381; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1382; SLM-NEXT: retq # sched: [4:1.00]
1383;
1384; SANDY-LABEL: test_movss_reg:
1385; SANDY: # BB#0:
1386; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1387; SANDY-NEXT: retq # sched: [5:1.00]
1388;
1389; HASWELL-LABEL: test_movss_reg:
1390; HASWELL: # BB#0:
1391; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
1392; HASWELL-NEXT: retq # sched: [1:1.00]
1393;
1394; BTVER2-LABEL: test_movss_reg:
1395; BTVER2: # BB#0:
1396; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1397; BTVER2-NEXT: retq # sched: [4:1.00]
1398 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1399 ret <4 x float> %1
1400}
1401
1402define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
1403; GENERIC-LABEL: test_movups:
1404; GENERIC: # BB#0:
1405; GENERIC-NEXT: movups (%rdi), %xmm0
1406; GENERIC-NEXT: addps %xmm0, %xmm0
1407; GENERIC-NEXT: movups %xmm0, (%rsi)
1408; GENERIC-NEXT: retq
1409;
1410; ATOM-LABEL: test_movups:
1411; ATOM: # BB#0:
1412; ATOM-NEXT: movups (%rdi), %xmm0
1413; ATOM-NEXT: addps %xmm0, %xmm0
1414; ATOM-NEXT: movups %xmm0, (%rsi)
1415; ATOM-NEXT: retq
1416;
1417; SLM-LABEL: test_movups:
1418; SLM: # BB#0:
1419; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
1420; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1421; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
1422; SLM-NEXT: retq # sched: [4:1.00]
1423;
1424; SANDY-LABEL: test_movups:
1425; SANDY: # BB#0:
1426; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
1427; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1428; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1429; SANDY-NEXT: retq # sched: [5:1.00]
1430;
1431; HASWELL-LABEL: test_movups:
1432; HASWELL: # BB#0:
1433; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
1434; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1435; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1436; HASWELL-NEXT: retq # sched: [1:1.00]
1437;
1438; BTVER2-LABEL: test_movups:
1439; BTVER2: # BB#0:
1440; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
1441; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1442; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1443; BTVER2-NEXT: retq # sched: [4:1.00]
1444 %1 = load <4 x float>, <4 x float> *%a0, align 1
1445 %2 = fadd <4 x float> %1, %1
1446 store <4 x float> %2, <4 x float> *%a1, align 1
1447 ret void
1448}
1449
1450define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1451; GENERIC-LABEL: test_mulps:
1452; GENERIC: # BB#0:
1453; GENERIC-NEXT: mulps %xmm1, %xmm0
1454; GENERIC-NEXT: mulps (%rdi), %xmm0
1455; GENERIC-NEXT: retq
1456;
1457; ATOM-LABEL: test_mulps:
1458; ATOM: # BB#0:
1459; ATOM-NEXT: mulps %xmm1, %xmm0
1460; ATOM-NEXT: mulps (%rdi), %xmm0
1461; ATOM-NEXT: retq
1462;
1463; SLM-LABEL: test_mulps:
1464; SLM: # BB#0:
1465; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
1466; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
1467; SLM-NEXT: retq # sched: [4:1.00]
1468;
1469; SANDY-LABEL: test_mulps:
1470; SANDY: # BB#0:
1471; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1472; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1473; SANDY-NEXT: retq # sched: [5:1.00]
1474;
1475; HASWELL-LABEL: test_mulps:
1476; HASWELL: # BB#0:
1477; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
1478; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1479; HASWELL-NEXT: retq # sched: [1:1.00]
1480;
1481; BTVER2-LABEL: test_mulps:
1482; BTVER2: # BB#0:
1483; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1484; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1485; BTVER2-NEXT: retq # sched: [4:1.00]
1486 %1 = fmul <4 x float> %a0, %a1
1487 %2 = load <4 x float>, <4 x float> *%a2, align 16
1488 %3 = fmul <4 x float> %1, %2
1489 ret <4 x float> %3
1490}
1491
1492define float @test_mulss(float %a0, float %a1, float *%a2) {
1493; GENERIC-LABEL: test_mulss:
1494; GENERIC: # BB#0:
1495; GENERIC-NEXT: mulss %xmm1, %xmm0
1496; GENERIC-NEXT: mulss (%rdi), %xmm0
1497; GENERIC-NEXT: retq
1498;
1499; ATOM-LABEL: test_mulss:
1500; ATOM: # BB#0:
1501; ATOM-NEXT: mulss %xmm1, %xmm0
1502; ATOM-NEXT: mulss (%rdi), %xmm0
1503; ATOM-NEXT: retq
1504;
1505; SLM-LABEL: test_mulss:
1506; SLM: # BB#0:
1507; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
1508; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
1509; SLM-NEXT: retq # sched: [4:1.00]
1510;
1511; SANDY-LABEL: test_mulss:
1512; SANDY: # BB#0:
1513; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1514; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1515; SANDY-NEXT: retq # sched: [5:1.00]
1516;
1517; HASWELL-LABEL: test_mulss:
1518; HASWELL: # BB#0:
1519; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
1520; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1521; HASWELL-NEXT: retq # sched: [1:1.00]
1522;
1523; BTVER2-LABEL: test_mulss:
1524; BTVER2: # BB#0:
1525; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1526; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1527; BTVER2-NEXT: retq # sched: [4:1.00]
1528 %1 = fmul float %a0, %a1
1529 %2 = load float, float *%a2, align 4
1530 %3 = fmul float %1, %2
1531 ret float %3
1532}
1533
1534define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1535; GENERIC-LABEL: test_orps:
1536; GENERIC: # BB#0:
1537; GENERIC-NEXT: orps %xmm1, %xmm0
1538; GENERIC-NEXT: orps (%rdi), %xmm0
1539; GENERIC-NEXT: retq
1540;
1541; ATOM-LABEL: test_orps:
1542; ATOM: # BB#0:
1543; ATOM-NEXT: orps %xmm1, %xmm0
1544; ATOM-NEXT: orps (%rdi), %xmm0
1545; ATOM-NEXT: nop
1546; ATOM-NEXT: nop
1547; ATOM-NEXT: nop
1548; ATOM-NEXT: nop
1549; ATOM-NEXT: nop
1550; ATOM-NEXT: nop
1551; ATOM-NEXT: nop
1552; ATOM-NEXT: nop
1553; ATOM-NEXT: retq
1554;
1555; SLM-LABEL: test_orps:
1556; SLM: # BB#0:
1557; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
1558; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
1559; SLM-NEXT: retq # sched: [4:1.00]
1560;
1561; SANDY-LABEL: test_orps:
1562; SANDY: # BB#0:
1563; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1564; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
1565; SANDY-NEXT: retq # sched: [5:1.00]
1566;
1567; HASWELL-LABEL: test_orps:
1568; HASWELL: # BB#0:
1569; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1570; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
1571; HASWELL-NEXT: retq # sched: [1:1.00]
1572;
1573; BTVER2-LABEL: test_orps:
1574; BTVER2: # BB#0:
1575; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1576; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1577; BTVER2-NEXT: retq # sched: [4:1.00]
1578 %1 = bitcast <4 x float> %a0 to <4 x i32>
1579 %2 = bitcast <4 x float> %a1 to <4 x i32>
1580 %3 = or <4 x i32> %1, %2
1581 %4 = load <4 x float>, <4 x float> *%a2, align 16
1582 %5 = bitcast <4 x float> %4 to <4 x i32>
1583 %6 = or <4 x i32> %3, %5
1584 %7 = bitcast <4 x i32> %6 to <4 x float>
1585 ret <4 x float> %7
1586}
1587
1588define void @test_prefetchnta(i8* %a0) {
1589; GENERIC-LABEL: test_prefetchnta:
1590; GENERIC: # BB#0:
1591; GENERIC-NEXT: prefetchnta (%rdi)
1592; GENERIC-NEXT: retq
1593;
1594; ATOM-LABEL: test_prefetchnta:
1595; ATOM: # BB#0:
1596; ATOM-NEXT: prefetchnta (%rdi)
1597; ATOM-NEXT: nop
1598; ATOM-NEXT: nop
1599; ATOM-NEXT: nop
1600; ATOM-NEXT: nop
1601; ATOM-NEXT: nop
1602; ATOM-NEXT: nop
1603; ATOM-NEXT: retq
1604;
1605; SLM-LABEL: test_prefetchnta:
1606; SLM: # BB#0:
1607; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
1608; SLM-NEXT: retq # sched: [4:1.00]
1609;
1610; SANDY-LABEL: test_prefetchnta:
1611; SANDY: # BB#0:
1612; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
1613; SANDY-NEXT: retq # sched: [5:1.00]
1614;
1615; HASWELL-LABEL: test_prefetchnta:
1616; HASWELL: # BB#0:
1617; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
1618; HASWELL-NEXT: retq # sched: [1:1.00]
1619;
1620; BTVER2-LABEL: test_prefetchnta:
1621; BTVER2: # BB#0:
1622; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
1623; BTVER2-NEXT: retq # sched: [4:1.00]
1624 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
1625 ret void
1626}
1627declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
1628
1629define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
1630; GENERIC-LABEL: test_rcpps:
1631; GENERIC: # BB#0:
1632; GENERIC-NEXT: rcpps %xmm0, %xmm1
1633; GENERIC-NEXT: rcpps (%rdi), %xmm0
1634; GENERIC-NEXT: addps %xmm1, %xmm0
1635; GENERIC-NEXT: retq
1636;
1637; ATOM-LABEL: test_rcpps:
1638; ATOM: # BB#0:
1639; ATOM-NEXT: rcpps (%rdi), %xmm1
1640; ATOM-NEXT: rcpps %xmm0, %xmm0
1641; ATOM-NEXT: addps %xmm0, %xmm1
1642; ATOM-NEXT: movaps %xmm1, %xmm0
1643; ATOM-NEXT: retq
1644;
1645; SLM-LABEL: test_rcpps:
1646; SLM: # BB#0:
1647; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
1648; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
1649; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1650; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
1651; SLM-NEXT: retq # sched: [4:1.00]
1652;
1653; SANDY-LABEL: test_rcpps:
1654; SANDY: # BB#0:
1655; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
1656; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
1657; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1658; SANDY-NEXT: retq # sched: [5:1.00]
1659;
1660; HASWELL-LABEL: test_rcpps:
1661; HASWELL: # BB#0:
1662; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
1663; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
1664; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1665; HASWELL-NEXT: retq # sched: [1:1.00]
1666;
1667; BTVER2-LABEL: test_rcpps:
1668; BTVER2: # BB#0:
1669; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
1670; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
1671; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1672; BTVER2-NEXT: retq # sched: [4:1.00]
1673 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1674 %2 = load <4 x float>, <4 x float> *%a1, align 16
1675 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
1676 %4 = fadd <4 x float> %1, %3
1677 ret <4 x float> %4
1678}
1679declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1680
1681; TODO - rcpss_m
1682
1683define <4 x float> @test_rcpss(float %a0, float *%a1) {
1684; GENERIC-LABEL: test_rcpss:
1685; GENERIC: # BB#0:
1686; GENERIC-NEXT: rcpss %xmm0, %xmm0
1687; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1688; GENERIC-NEXT: rcpss %xmm1, %xmm1
1689; GENERIC-NEXT: addps %xmm1, %xmm0
1690; GENERIC-NEXT: retq
1691;
1692; ATOM-LABEL: test_rcpss:
1693; ATOM: # BB#0:
1694; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1695; ATOM-NEXT: rcpss %xmm0, %xmm0
1696; ATOM-NEXT: rcpss %xmm1, %xmm1
1697; ATOM-NEXT: addps %xmm1, %xmm0
1698; ATOM-NEXT: retq
1699;
1700; SLM-LABEL: test_rcpss:
1701; SLM: # BB#0:
1702; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
1703; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
1704; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
1705; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1706; SLM-NEXT: retq # sched: [4:1.00]
1707;
1708; SANDY-LABEL: test_rcpss:
1709; SANDY: # BB#0:
1710; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
1711; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
1712; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
1713; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1714; SANDY-NEXT: retq # sched: [5:1.00]
1715;
1716; HASWELL-LABEL: test_rcpss:
1717; HASWELL: # BB#0:
1718; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
1719; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
1720; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
1721; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1722; HASWELL-NEXT: retq # sched: [1:1.00]
1723;
1724; BTVER2-LABEL: test_rcpss:
1725; BTVER2: # BB#0:
1726; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
1727; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
1728; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
1729; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1730; BTVER2-NEXT: retq # sched: [4:1.00]
1731 %1 = insertelement <4 x float> undef, float %a0, i32 0
1732 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
1733 %3 = load float, float *%a1, align 4
1734 %4 = insertelement <4 x float> undef, float %3, i32 0
1735 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
1736 %6 = fadd <4 x float> %2, %5
1737 ret <4 x float> %6
1738}
1739declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1740
1741define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
1742; GENERIC-LABEL: test_rsqrtps:
1743; GENERIC: # BB#0:
1744; GENERIC-NEXT: rsqrtps %xmm0, %xmm1
1745; GENERIC-NEXT: rsqrtps (%rdi), %xmm0
1746; GENERIC-NEXT: addps %xmm1, %xmm0
1747; GENERIC-NEXT: retq
1748;
1749; ATOM-LABEL: test_rsqrtps:
1750; ATOM: # BB#0:
1751; ATOM-NEXT: rsqrtps (%rdi), %xmm1
1752; ATOM-NEXT: rsqrtps %xmm0, %xmm0
1753; ATOM-NEXT: addps %xmm0, %xmm1
1754; ATOM-NEXT: movaps %xmm1, %xmm0
1755; ATOM-NEXT: retq
1756;
1757; SLM-LABEL: test_rsqrtps:
1758; SLM: # BB#0:
1759; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
1760; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
1761; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1762; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
1763; SLM-NEXT: retq # sched: [4:1.00]
1764;
1765; SANDY-LABEL: test_rsqrtps:
1766; SANDY: # BB#0:
1767; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
1768; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
1769; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1770; SANDY-NEXT: retq # sched: [5:1.00]
1771;
1772; HASWELL-LABEL: test_rsqrtps:
1773; HASWELL: # BB#0:
1774; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
1775; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
1776; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1777; HASWELL-NEXT: retq # sched: [1:1.00]
1778;
1779; BTVER2-LABEL: test_rsqrtps:
1780; BTVER2: # BB#0:
1781; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
1782; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
1783; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1784; BTVER2-NEXT: retq # sched: [4:1.00]
1785 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
1786 %2 = load <4 x float>, <4 x float> *%a1, align 16
1787 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
1788 %4 = fadd <4 x float> %1, %3
1789 ret <4 x float> %4
1790}
1791declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1792
1793; TODO - rsqrtss_m
1794
1795define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
1796; GENERIC-LABEL: test_rsqrtss:
1797; GENERIC: # BB#0:
1798; GENERIC-NEXT: rsqrtss %xmm0, %xmm0
1799; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1800; GENERIC-NEXT: rsqrtss %xmm1, %xmm1
1801; GENERIC-NEXT: addps %xmm1, %xmm0
1802; GENERIC-NEXT: retq
1803;
1804; ATOM-LABEL: test_rsqrtss:
1805; ATOM: # BB#0:
1806; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1807; ATOM-NEXT: rsqrtss %xmm0, %xmm0
1808; ATOM-NEXT: rsqrtss %xmm1, %xmm1
1809; ATOM-NEXT: addps %xmm1, %xmm0
1810; ATOM-NEXT: retq
1811;
1812; SLM-LABEL: test_rsqrtss:
1813; SLM: # BB#0:
1814; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
1815; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
1816; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
1817; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1818; SLM-NEXT: retq # sched: [4:1.00]
1819;
1820; SANDY-LABEL: test_rsqrtss:
1821; SANDY: # BB#0:
1822; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
1823; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
1824; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
1825; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1826; SANDY-NEXT: retq # sched: [5:1.00]
1827;
1828; HASWELL-LABEL: test_rsqrtss:
1829; HASWELL: # BB#0:
1830; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
1831; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
1832; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
1833; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1834; HASWELL-NEXT: retq # sched: [1:1.00]
1835;
1836; BTVER2-LABEL: test_rsqrtss:
1837; BTVER2: # BB#0:
1838; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
1839; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
1840; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
1841; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1842; BTVER2-NEXT: retq # sched: [4:1.00]
1843 %1 = insertelement <4 x float> undef, float %a0, i32 0
1844 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
1845 %3 = load float, float *%a1, align 4
1846 %4 = insertelement <4 x float> undef, float %3, i32 0
1847 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
1848 %6 = fadd <4 x float> %2, %5
1849 ret <4 x float> %6
1850}
1851declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1852
1853define void @test_sfence() {
1854; GENERIC-LABEL: test_sfence:
1855; GENERIC: # BB#0:
1856; GENERIC-NEXT: sfence
1857; GENERIC-NEXT: retq
1858;
1859; ATOM-LABEL: test_sfence:
1860; ATOM: # BB#0:
1861; ATOM-NEXT: sfence
1862; ATOM-NEXT: nop
1863; ATOM-NEXT: nop
1864; ATOM-NEXT: nop
1865; ATOM-NEXT: nop
1866; ATOM-NEXT: nop
1867; ATOM-NEXT: nop
1868; ATOM-NEXT: retq
1869;
1870; SLM-LABEL: test_sfence:
1871; SLM: # BB#0:
1872; SLM-NEXT: sfence # sched: [1:1.00]
1873; SLM-NEXT: retq # sched: [4:1.00]
1874;
1875; SANDY-LABEL: test_sfence:
1876; SANDY: # BB#0:
1877; SANDY-NEXT: sfence # sched: [1:1.00]
1878; SANDY-NEXT: retq # sched: [5:1.00]
1879;
1880; HASWELL-LABEL: test_sfence:
1881; HASWELL: # BB#0:
1882; HASWELL-NEXT: sfence # sched: [1:1.00]
1883; HASWELL-NEXT: retq # sched: [1:1.00]
1884;
1885; BTVER2-LABEL: test_sfence:
1886; BTVER2: # BB#0:
1887; BTVER2-NEXT: sfence # sched: [1:1.00]
1888; BTVER2-NEXT: retq # sched: [4:1.00]
1889 call void @llvm.x86.sse.sfence()
1890 ret void
1891}
1892declare void @llvm.x86.sse.sfence() nounwind readnone
1893
1894define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
1895; GENERIC-LABEL: test_shufps:
1896; GENERIC: # BB#0:
1897; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1898; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
1899; GENERIC-NEXT: retq
1900;
1901; ATOM-LABEL: test_shufps:
1902; ATOM: # BB#0:
1903; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1904; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
1905; ATOM-NEXT: nop
1906; ATOM-NEXT: nop
1907; ATOM-NEXT: nop
1908; ATOM-NEXT: nop
1909; ATOM-NEXT: retq
1910;
1911; SLM-LABEL: test_shufps:
1912; SLM: # BB#0:
1913; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
1914; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
1915; SLM-NEXT: retq # sched: [4:1.00]
1916;
1917; SANDY-LABEL: test_shufps:
1918; SANDY: # BB#0:
1919; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
1920; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
1921; SANDY-NEXT: retq # sched: [5:1.00]
1922;
1923; HASWELL-LABEL: test_shufps:
1924; HASWELL: # BB#0:
1925; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
1926; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
1927; HASWELL-NEXT: retq # sched: [1:1.00]
1928;
1929; BTVER2-LABEL: test_shufps:
1930; BTVER2: # BB#0:
1931; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
1932; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
1933; BTVER2-NEXT: retq # sched: [4:1.00]
1934 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
1935 %2 = load <4 x float>, <4 x float> *%a2, align 16
1936 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
1937 ret <4 x float> %3
1938}
1939
1940define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
1941; GENERIC-LABEL: test_sqrtps:
1942; GENERIC: # BB#0:
1943; GENERIC-NEXT: sqrtps %xmm0, %xmm1
1944; GENERIC-NEXT: sqrtps (%rdi), %xmm0
1945; GENERIC-NEXT: addps %xmm1, %xmm0
1946; GENERIC-NEXT: retq
1947;
1948; ATOM-LABEL: test_sqrtps:
1949; ATOM: # BB#0:
1950; ATOM-NEXT: sqrtps %xmm0, %xmm1
1951; ATOM-NEXT: sqrtps (%rdi), %xmm0
1952; ATOM-NEXT: addps %xmm1, %xmm0
1953; ATOM-NEXT: retq
1954;
1955; SLM-LABEL: test_sqrtps:
1956; SLM: # BB#0:
1957; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
1958; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
1959; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1960; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
1961; SLM-NEXT: retq # sched: [4:1.00]
1962;
1963; SANDY-LABEL: test_sqrtps:
1964; SANDY: # BB#0:
1965; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
1966; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
1967; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1968; SANDY-NEXT: retq # sched: [5:1.00]
1969;
1970; HASWELL-LABEL: test_sqrtps:
1971; HASWELL: # BB#0:
1972; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
1973; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
1974; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1975; HASWELL-NEXT: retq # sched: [1:1.00]
1976;
1977; BTVER2-LABEL: test_sqrtps:
1978; BTVER2: # BB#0:
1979; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
1980; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
1981; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1982; BTVER2-NEXT: retq # sched: [4:1.00]
1983 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
1984 %2 = load <4 x float>, <4 x float> *%a1, align 16
1985 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
1986 %4 = fadd <4 x float> %1, %3
1987 ret <4 x float> %4
1988}
1989declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1990
1991; TODO - sqrtss_m
1992
1993define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
1994; GENERIC-LABEL: test_sqrtss:
1995; GENERIC: # BB#0:
1996; GENERIC-NEXT: sqrtss %xmm0, %xmm0
1997; GENERIC-NEXT: movaps (%rdi), %xmm1
1998; GENERIC-NEXT: sqrtss %xmm1, %xmm1
1999; GENERIC-NEXT: addps %xmm1, %xmm0
2000; GENERIC-NEXT: retq
2001;
2002; ATOM-LABEL: test_sqrtss:
2003; ATOM: # BB#0:
2004; ATOM-NEXT: movaps (%rdi), %xmm1
2005; ATOM-NEXT: sqrtss %xmm0, %xmm0
2006; ATOM-NEXT: sqrtss %xmm1, %xmm1
2007; ATOM-NEXT: addps %xmm1, %xmm0
2008; ATOM-NEXT: retq
2009;
2010; SLM-LABEL: test_sqrtss:
2011; SLM: # BB#0:
2012; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
2013; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
2014; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
2015; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2016; SLM-NEXT: retq # sched: [4:1.00]
2017;
2018; SANDY-LABEL: test_sqrtss:
2019; SANDY: # BB#0:
2020; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
2021; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
2022; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
2023; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2024; SANDY-NEXT: retq # sched: [5:1.00]
2025;
2026; HASWELL-LABEL: test_sqrtss:
2027; HASWELL: # BB#0:
2028; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
2029; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
2030; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
2031; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2032; HASWELL-NEXT: retq # sched: [1:1.00]
2033;
2034; BTVER2-LABEL: test_sqrtss:
2035; BTVER2: # BB#0:
2036; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
2037; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
2038; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
2039; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2040; BTVER2-NEXT: retq # sched: [4:1.00]
2041 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
2042 %2 = load <4 x float>, <4 x float> *%a1, align 16
2043 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
2044 %4 = fadd <4 x float> %1, %3
2045 ret <4 x float> %4
2046}
2047declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2048
2049define i32 @test_stmxcsr() {
2050; GENERIC-LABEL: test_stmxcsr:
2051; GENERIC: # BB#0:
2052; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
2053; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax
2054; GENERIC-NEXT: retq
2055;
2056; ATOM-LABEL: test_stmxcsr:
2057; ATOM: # BB#0:
2058; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
2059; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax
2060; ATOM-NEXT: retq
2061;
2062; SLM-LABEL: test_stmxcsr:
2063; SLM: # BB#0:
2064; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2065; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
2066; SLM-NEXT: retq # sched: [4:1.00]
2067;
2068; SANDY-LABEL: test_stmxcsr:
2069; SANDY: # BB#0:
2070; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2071; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
2072; SANDY-NEXT: retq # sched: [5:1.00]
2073;
2074; HASWELL-LABEL: test_stmxcsr:
2075; HASWELL: # BB#0:
2076; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
2077; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
2078; HASWELL-NEXT: retq # sched: [1:1.00]
2079;
2080; BTVER2-LABEL: test_stmxcsr:
2081; BTVER2: # BB#0:
2082; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2083; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
2084; BTVER2-NEXT: retq # sched: [4:1.00]
2085 %1 = alloca i32, align 4
2086 %2 = bitcast i32* %1 to i8*
2087 call void @llvm.x86.sse.stmxcsr(i8* %2)
2088 %3 = load i32, i32* %1, align 4
2089 ret i32 %3
2090}
2091declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
2092
2093define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2094; GENERIC-LABEL: test_subps:
2095; GENERIC: # BB#0:
2096; GENERIC-NEXT: subps %xmm1, %xmm0
2097; GENERIC-NEXT: subps (%rdi), %xmm0
2098; GENERIC-NEXT: retq
2099;
2100; ATOM-LABEL: test_subps:
2101; ATOM: # BB#0:
2102; ATOM-NEXT: subps %xmm1, %xmm0
2103; ATOM-NEXT: subps (%rdi), %xmm0
2104; ATOM-NEXT: retq
2105;
2106; SLM-LABEL: test_subps:
2107; SLM: # BB#0:
2108; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2109; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
2110; SLM-NEXT: retq # sched: [4:1.00]
2111;
2112; SANDY-LABEL: test_subps:
2113; SANDY: # BB#0:
2114; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2115; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2116; SANDY-NEXT: retq # sched: [5:1.00]
2117;
2118; HASWELL-LABEL: test_subps:
2119; HASWELL: # BB#0:
2120; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2121; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2122; HASWELL-NEXT: retq # sched: [1:1.00]
2123;
2124; BTVER2-LABEL: test_subps:
2125; BTVER2: # BB#0:
2126; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2127; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2128; BTVER2-NEXT: retq # sched: [4:1.00]
2129 %1 = fsub <4 x float> %a0, %a1
2130 %2 = load <4 x float>, <4 x float> *%a2, align 16
2131 %3 = fsub <4 x float> %1, %2
2132 ret <4 x float> %3
2133}
2134
2135define float @test_subss(float %a0, float %a1, float *%a2) {
2136; GENERIC-LABEL: test_subss:
2137; GENERIC: # BB#0:
2138; GENERIC-NEXT: subss %xmm1, %xmm0
2139; GENERIC-NEXT: subss (%rdi), %xmm0
2140; GENERIC-NEXT: retq
2141;
2142; ATOM-LABEL: test_subss:
2143; ATOM: # BB#0:
2144; ATOM-NEXT: subss %xmm1, %xmm0
2145; ATOM-NEXT: subss (%rdi), %xmm0
2146; ATOM-NEXT: retq
2147;
2148; SLM-LABEL: test_subss:
2149; SLM: # BB#0:
2150; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
2151; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
2152; SLM-NEXT: retq # sched: [4:1.00]
2153;
2154; SANDY-LABEL: test_subss:
2155; SANDY: # BB#0:
2156; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2157; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2158; SANDY-NEXT: retq # sched: [5:1.00]
2159;
2160; HASWELL-LABEL: test_subss:
2161; HASWELL: # BB#0:
2162; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2163; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2164; HASWELL-NEXT: retq # sched: [1:1.00]
2165;
2166; BTVER2-LABEL: test_subss:
2167; BTVER2: # BB#0:
2168; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2169; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2170; BTVER2-NEXT: retq # sched: [4:1.00]
2171 %1 = fsub float %a0, %a1
2172 %2 = load float, float *%a2, align 4
2173 %3 = fsub float %1, %2
2174 ret float %3
2175}
2176
2177define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2178; GENERIC-LABEL: test_ucomiss:
2179; GENERIC: # BB#0:
2180; GENERIC-NEXT: ucomiss %xmm1, %xmm0
2181; GENERIC-NEXT: setnp %al
2182; GENERIC-NEXT: sete %cl
2183; GENERIC-NEXT: andb %al, %cl
2184; GENERIC-NEXT: ucomiss (%rdi), %xmm0
2185; GENERIC-NEXT: setnp %al
2186; GENERIC-NEXT: sete %dl
2187; GENERIC-NEXT: andb %al, %dl
2188; GENERIC-NEXT: orb %cl, %dl
2189; GENERIC-NEXT: movzbl %dl, %eax
2190; GENERIC-NEXT: retq
2191;
2192; ATOM-LABEL: test_ucomiss:
2193; ATOM: # BB#0:
2194; ATOM-NEXT: ucomiss %xmm1, %xmm0
2195; ATOM-NEXT: setnp %al
2196; ATOM-NEXT: sete %cl
2197; ATOM-NEXT: andb %al, %cl
2198; ATOM-NEXT: ucomiss (%rdi), %xmm0
2199; ATOM-NEXT: setnp %al
2200; ATOM-NEXT: sete %dl
2201; ATOM-NEXT: andb %al, %dl
2202; ATOM-NEXT: orb %cl, %dl
2203; ATOM-NEXT: movzbl %dl, %eax
2204; ATOM-NEXT: retq
2205;
2206; SLM-LABEL: test_ucomiss:
2207; SLM: # BB#0:
2208; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
2209; SLM-NEXT: setnp %al # sched: [1:0.50]
2210; SLM-NEXT: sete %cl # sched: [1:0.50]
2211; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
2212; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
2213; SLM-NEXT: setnp %al # sched: [1:0.50]
2214; SLM-NEXT: sete %dl # sched: [1:0.50]
2215; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
2216; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
2217; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2218; SLM-NEXT: retq # sched: [4:1.00]
2219;
2220; SANDY-LABEL: test_ucomiss:
2221; SANDY: # BB#0:
2222; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2223; SANDY-NEXT: setnp %al # sched: [1:0.33]
2224; SANDY-NEXT: sete %cl # sched: [1:0.33]
2225; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
2226; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
2227; SANDY-NEXT: setnp %al # sched: [1:0.33]
2228; SANDY-NEXT: sete %dl # sched: [1:0.33]
2229; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
2230; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
2231; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
2232; SANDY-NEXT: retq # sched: [5:1.00]
2233;
2234; HASWELL-LABEL: test_ucomiss:
2235; HASWELL: # BB#0:
2236; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2237; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2238; HASWELL-NEXT: sete %cl # sched: [1:0.50]
2239; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
2240; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
2241; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2242; HASWELL-NEXT: sete %dl # sched: [1:0.50]
2243; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
2244; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
2245; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
2246; HASWELL-NEXT: retq # sched: [1:1.00]
2247;
2248; BTVER2-LABEL: test_ucomiss:
2249; BTVER2: # BB#0:
2250; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2251; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2252; BTVER2-NEXT: sete %cl # sched: [1:0.50]
2253; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
2254; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
2255; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2256; BTVER2-NEXT: sete %dl # sched: [1:0.50]
2257; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
2258; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
2259; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2260; BTVER2-NEXT: retq # sched: [4:1.00]
2261 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
2262 %2 = load <4 x float>, <4 x float> *%a2, align 4
2263 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
2264 %4 = or i32 %1, %3
2265 ret i32 %4
2266}
2267declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2268
2269define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2270; GENERIC-LABEL: test_unpckhps:
2271; GENERIC: # BB#0:
2272; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2273; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
2274; GENERIC-NEXT: retq
2275;
2276; ATOM-LABEL: test_unpckhps:
2277; ATOM: # BB#0:
2278; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2279; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
2280; ATOM-NEXT: nop
2281; ATOM-NEXT: nop
2282; ATOM-NEXT: nop
2283; ATOM-NEXT: nop
2284; ATOM-NEXT: retq
2285;
2286; SLM-LABEL: test_unpckhps:
2287; SLM: # BB#0:
2288; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2289; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
2290; SLM-NEXT: retq # sched: [4:1.00]
2291;
2292; SANDY-LABEL: test_unpckhps:
2293; SANDY: # BB#0:
2294; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2295; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
2296; SANDY-NEXT: retq # sched: [5:1.00]
2297;
2298; HASWELL-LABEL: test_unpckhps:
2299; HASWELL: # BB#0:
2300; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2301; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
2302; HASWELL-NEXT: retq # sched: [1:1.00]
2303;
2304; BTVER2-LABEL: test_unpckhps:
2305; BTVER2: # BB#0:
2306; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
2307; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
2308; BTVER2-NEXT: retq # sched: [4:1.00]
2309 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2310 %2 = load <4 x float>, <4 x float> *%a2, align 16
2311 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2312 ret <4 x float> %3
2313}
2314
2315define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2316; GENERIC-LABEL: test_unpcklps:
2317; GENERIC: # BB#0:
2318; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2319; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2320; GENERIC-NEXT: retq
2321;
2322; ATOM-LABEL: test_unpcklps:
2323; ATOM: # BB#0:
2324; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2325; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2326; ATOM-NEXT: nop
2327; ATOM-NEXT: nop
2328; ATOM-NEXT: nop
2329; ATOM-NEXT: nop
2330; ATOM-NEXT: retq
2331;
2332; SLM-LABEL: test_unpcklps:
2333; SLM: # BB#0:
2334; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2335; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
2336; SLM-NEXT: retq # sched: [4:1.00]
2337;
2338; SANDY-LABEL: test_unpcklps:
2339; SANDY: # BB#0:
2340; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2341; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
2342; SANDY-NEXT: retq # sched: [5:1.00]
2343;
2344; HASWELL-LABEL: test_unpcklps:
2345; HASWELL: # BB#0:
2346; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2347; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
2348; HASWELL-NEXT: retq # sched: [1:1.00]
2349;
2350; BTVER2-LABEL: test_unpcklps:
2351; BTVER2: # BB#0:
2352; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
2353; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
2354; BTVER2-NEXT: retq # sched: [4:1.00]
2355 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2356 %2 = load <4 x float>, <4 x float> *%a2, align 16
2357 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2358 ret <4 x float> %3
2359}
2360
2361define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2362; GENERIC-LABEL: test_xorps:
2363; GENERIC: # BB#0:
2364; GENERIC-NEXT: xorps %xmm1, %xmm0
2365; GENERIC-NEXT: xorps (%rdi), %xmm0
2366; GENERIC-NEXT: retq
2367;
2368; ATOM-LABEL: test_xorps:
2369; ATOM: # BB#0:
2370; ATOM-NEXT: xorps %xmm1, %xmm0
2371; ATOM-NEXT: xorps (%rdi), %xmm0
2372; ATOM-NEXT: nop
2373; ATOM-NEXT: nop
2374; ATOM-NEXT: nop
2375; ATOM-NEXT: nop
2376; ATOM-NEXT: nop
2377; ATOM-NEXT: nop
2378; ATOM-NEXT: nop
2379; ATOM-NEXT: nop
2380; ATOM-NEXT: retq
2381;
2382; SLM-LABEL: test_xorps:
2383; SLM: # BB#0:
2384; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
2385; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
2386; SLM-NEXT: retq # sched: [4:1.00]
2387;
2388; SANDY-LABEL: test_xorps:
2389; SANDY: # BB#0:
2390; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2391; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2392; SANDY-NEXT: retq # sched: [5:1.00]
2393;
2394; HASWELL-LABEL: test_xorps:
2395; HASWELL: # BB#0:
2396; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2397; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
2398; HASWELL-NEXT: retq # sched: [1:1.00]
2399;
2400; BTVER2-LABEL: test_xorps:
2401; BTVER2: # BB#0:
2402; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2403; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2404; BTVER2-NEXT: retq # sched: [4:1.00]
2405 %1 = bitcast <4 x float> %a0 to <4 x i32>
2406 %2 = bitcast <4 x float> %a1 to <4 x i32>
2407 %3 = xor <4 x i32> %1, %2
2408 %4 = load <4 x float>, <4 x float> *%a2, align 16
2409 %5 = bitcast <4 x float> %4 to <4 x i32>
2410 %6 = xor <4 x i32> %3, %5
2411 %7 = bitcast <4 x i32> %6 to <4 x float>
2412 ret <4 x float> %7
2413}
2414
2415!0 = !{i32 1}