blob: 4164cb16bbdbff7173abf2aa9c163c4cbd5c76a6 [file] [log] [blame]
Simon Pilgrim93986492017-04-18 19:04:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
Craig Topper106b5b62017-07-19 02:45:14 +000010; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
Simon Pilgrim93986492017-04-18 19:04:40 +000011
12define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
13; GENERIC-LABEL: test_addps:
14; GENERIC: # BB#0:
15; GENERIC-NEXT: addps %xmm1, %xmm0
16; GENERIC-NEXT: addps (%rdi), %xmm0
17; GENERIC-NEXT: retq
18;
19; ATOM-LABEL: test_addps:
20; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000021; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
22; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
23; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000024;
25; SLM-LABEL: test_addps:
26; SLM: # BB#0:
27; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
28; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
29; SLM-NEXT: retq # sched: [4:1.00]
30;
31; SANDY-LABEL: test_addps:
32; SANDY: # BB#0:
33; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000034; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
35; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000036;
37; HASWELL-LABEL: test_addps:
38; HASWELL: # BB#0:
39; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +000040; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
41; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000042;
43; BTVER2-LABEL: test_addps:
44; BTVER2: # BB#0:
45; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
46; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
47; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +000048;
49; ZNVER1-LABEL: test_addps:
50; ZNVER1: # BB#0:
51; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
52; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
53; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000054 %1 = fadd <4 x float> %a0, %a1
55 %2 = load <4 x float>, <4 x float> *%a2, align 16
56 %3 = fadd <4 x float> %1, %2
57 ret <4 x float> %3
58}
59
60define float @test_addss(float %a0, float %a1, float *%a2) {
61; GENERIC-LABEL: test_addss:
62; GENERIC: # BB#0:
63; GENERIC-NEXT: addss %xmm1, %xmm0
64; GENERIC-NEXT: addss (%rdi), %xmm0
65; GENERIC-NEXT: retq
66;
67; ATOM-LABEL: test_addss:
68; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000069; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
70; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
71; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000072;
73; SLM-LABEL: test_addss:
74; SLM: # BB#0:
75; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
76; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
77; SLM-NEXT: retq # sched: [4:1.00]
78;
79; SANDY-LABEL: test_addss:
80; SANDY: # BB#0:
81; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000082; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
83; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000084;
85; HASWELL-LABEL: test_addss:
86; HASWELL: # BB#0:
87; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +000088; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
89; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000090;
91; BTVER2-LABEL: test_addss:
92; BTVER2: # BB#0:
93; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
94; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
95; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +000096;
97; ZNVER1-LABEL: test_addss:
98; ZNVER1: # BB#0:
99; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
100; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
101; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000102 %1 = fadd float %a0, %a1
103 %2 = load float, float *%a2, align 4
104 %3 = fadd float %1, %2
105 ret float %3
106}
107
108define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
109; GENERIC-LABEL: test_andps:
110; GENERIC: # BB#0:
111; GENERIC-NEXT: andps %xmm1, %xmm0
112; GENERIC-NEXT: andps (%rdi), %xmm0
113; GENERIC-NEXT: retq
114;
115; ATOM-LABEL: test_andps:
116; ATOM: # BB#0:
117; ATOM-NEXT: andps %xmm1, %xmm0
118; ATOM-NEXT: andps (%rdi), %xmm0
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000119; ATOM-NEXT: nop # sched: [1:0.50]
120; ATOM-NEXT: nop # sched: [1:0.50]
121; ATOM-NEXT: nop # sched: [1:0.50]
122; ATOM-NEXT: nop # sched: [1:0.50]
123; ATOM-NEXT: nop # sched: [1:0.50]
124; ATOM-NEXT: nop # sched: [1:0.50]
125; ATOM-NEXT: nop # sched: [1:0.50]
126; ATOM-NEXT: nop # sched: [1:0.50]
127; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000128;
129; SLM-LABEL: test_andps:
130; SLM: # BB#0:
131; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
132; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
133; SLM-NEXT: retq # sched: [4:1.00]
134;
135; SANDY-LABEL: test_andps:
136; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000137; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
138; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
139; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000140;
141; HASWELL-LABEL: test_andps:
142; HASWELL: # BB#0:
143; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000144; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
145; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000146;
147; BTVER2-LABEL: test_andps:
148; BTVER2: # BB#0:
149; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
150; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
151; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000152;
153; ZNVER1-LABEL: test_andps:
154; ZNVER1: # BB#0:
155; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
156; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
157; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000158 %1 = bitcast <4 x float> %a0 to <4 x i32>
159 %2 = bitcast <4 x float> %a1 to <4 x i32>
160 %3 = and <4 x i32> %1, %2
161 %4 = load <4 x float>, <4 x float> *%a2, align 16
162 %5 = bitcast <4 x float> %4 to <4 x i32>
163 %6 = and <4 x i32> %3, %5
164 %7 = bitcast <4 x i32> %6 to <4 x float>
165 ret <4 x float> %7
166}
167
168define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
169; GENERIC-LABEL: test_andnotps:
170; GENERIC: # BB#0:
171; GENERIC-NEXT: andnps %xmm1, %xmm0
172; GENERIC-NEXT: andnps (%rdi), %xmm0
173; GENERIC-NEXT: retq
174;
175; ATOM-LABEL: test_andnotps:
176; ATOM: # BB#0:
177; ATOM-NEXT: andnps %xmm1, %xmm0
178; ATOM-NEXT: andnps (%rdi), %xmm0
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000179; ATOM-NEXT: nop # sched: [1:0.50]
180; ATOM-NEXT: nop # sched: [1:0.50]
181; ATOM-NEXT: nop # sched: [1:0.50]
182; ATOM-NEXT: nop # sched: [1:0.50]
183; ATOM-NEXT: nop # sched: [1:0.50]
184; ATOM-NEXT: nop # sched: [1:0.50]
185; ATOM-NEXT: nop # sched: [1:0.50]
186; ATOM-NEXT: nop # sched: [1:0.50]
187; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000188;
189; SLM-LABEL: test_andnotps:
190; SLM: # BB#0:
191; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
192; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
193; SLM-NEXT: retq # sched: [4:1.00]
194;
195; SANDY-LABEL: test_andnotps:
196; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000197; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
198; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
199; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000200;
201; HASWELL-LABEL: test_andnotps:
202; HASWELL: # BB#0:
203; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000204; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
205; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000206;
207; BTVER2-LABEL: test_andnotps:
208; BTVER2: # BB#0:
209; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
210; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
211; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000212;
213; ZNVER1-LABEL: test_andnotps:
214; ZNVER1: # BB#0:
215; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
216; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
217; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000218 %1 = bitcast <4 x float> %a0 to <4 x i32>
219 %2 = bitcast <4 x float> %a1 to <4 x i32>
220 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
221 %4 = and <4 x i32> %3, %2
222 %5 = load <4 x float>, <4 x float> *%a2, align 16
223 %6 = bitcast <4 x float> %5 to <4 x i32>
224 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
225 %8 = and <4 x i32> %6, %7
226 %9 = bitcast <4 x i32> %8 to <4 x float>
227 ret <4 x float> %9
228}
229
230define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
231; GENERIC-LABEL: test_cmpps:
232; GENERIC: # BB#0:
233; GENERIC-NEXT: cmpeqps %xmm0, %xmm1
234; GENERIC-NEXT: cmpeqps (%rdi), %xmm0
235; GENERIC-NEXT: orps %xmm1, %xmm0
236; GENERIC-NEXT: retq
237;
238; ATOM-LABEL: test_cmpps:
239; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000240; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
241; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000242; ATOM-NEXT: orps %xmm1, %xmm0
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000243; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000244;
245; SLM-LABEL: test_cmpps:
246; SLM: # BB#0:
247; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
248; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
249; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
250; SLM-NEXT: retq # sched: [4:1.00]
251;
252; SANDY-LABEL: test_cmpps:
253; SANDY: # BB#0:
254; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000255; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
256; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
257; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000258;
259; HASWELL-LABEL: test_cmpps:
260; HASWELL: # BB#0:
261; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000262; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000263; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000264; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000265;
266; BTVER2-LABEL: test_cmpps:
267; BTVER2: # BB#0:
268; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
269; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
270; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
271; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000272;
273; ZNVER1-LABEL: test_cmpps:
274; ZNVER1: # BB#0:
275; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
276; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
277; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
278; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000279 %1 = fcmp oeq <4 x float> %a0, %a1
280 %2 = load <4 x float>, <4 x float> *%a2, align 16
281 %3 = fcmp oeq <4 x float> %a0, %2
282 %4 = or <4 x i1> %1, %3
283 %5 = sext <4 x i1> %4 to <4 x i32>
284 %6 = bitcast <4 x i32> %5 to <4 x float>
285 ret <4 x float> %6
286}
287
288define float @test_cmpss(float %a0, float %a1, float *%a2) {
289; GENERIC-LABEL: test_cmpss:
290; GENERIC: # BB#0:
291; GENERIC-NEXT: cmpeqss %xmm1, %xmm0
292; GENERIC-NEXT: cmpeqss (%rdi), %xmm0
293; GENERIC-NEXT: retq
294;
295; ATOM-LABEL: test_cmpss:
296; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000297; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
298; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
299; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000300;
301; SLM-LABEL: test_cmpss:
302; SLM: # BB#0:
303; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
304; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
305; SLM-NEXT: retq # sched: [4:1.00]
306;
307; SANDY-LABEL: test_cmpss:
308; SANDY: # BB#0:
309; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
310; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000311; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000312;
313; HASWELL-LABEL: test_cmpss:
314; HASWELL: # BB#0:
315; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
316; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000317; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000318;
319; BTVER2-LABEL: test_cmpss:
320; BTVER2: # BB#0:
321; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
322; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
323; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000324;
325; ZNVER1-LABEL: test_cmpss:
326; ZNVER1: # BB#0:
327; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
328; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
329; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000330 %1 = insertelement <4 x float> undef, float %a0, i32 0
331 %2 = insertelement <4 x float> undef, float %a1, i32 0
332 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
333 %4 = load float, float *%a2, align 4
334 %5 = insertelement <4 x float> undef, float %4, i32 0
335 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
336 %7 = extractelement <4 x float> %6, i32 0
337 ret float %7
338}
339declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
340
341define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
342; GENERIC-LABEL: test_comiss:
343; GENERIC: # BB#0:
344; GENERIC-NEXT: comiss %xmm1, %xmm0
345; GENERIC-NEXT: setnp %al
346; GENERIC-NEXT: sete %cl
347; GENERIC-NEXT: andb %al, %cl
348; GENERIC-NEXT: comiss (%rdi), %xmm0
349; GENERIC-NEXT: setnp %al
350; GENERIC-NEXT: sete %dl
351; GENERIC-NEXT: andb %al, %dl
352; GENERIC-NEXT: orb %cl, %dl
353; GENERIC-NEXT: movzbl %dl, %eax
354; GENERIC-NEXT: retq
355;
356; ATOM-LABEL: test_comiss:
357; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000358; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
359; ATOM-NEXT: setnp %al # sched: [1:0.50]
360; ATOM-NEXT: sete %cl # sched: [1:0.50]
361; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
362; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
363; ATOM-NEXT: setnp %al # sched: [1:0.50]
364; ATOM-NEXT: sete %dl # sched: [1:0.50]
365; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
366; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
367; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
368; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000369;
370; SLM-LABEL: test_comiss:
371; SLM: # BB#0:
372; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
373; SLM-NEXT: setnp %al # sched: [1:0.50]
374; SLM-NEXT: sete %cl # sched: [1:0.50]
375; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
376; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
377; SLM-NEXT: setnp %al # sched: [1:0.50]
378; SLM-NEXT: sete %dl # sched: [1:0.50]
379; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
380; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
381; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
382; SLM-NEXT: retq # sched: [4:1.00]
383;
384; SANDY-LABEL: test_comiss:
385; SANDY: # BB#0:
386; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000387; SANDY-NEXT: setnp %al # sched: [1:1.00]
388; SANDY-NEXT: sete %cl # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000389; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
390; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000391; SANDY-NEXT: setnp %al # sched: [1:1.00]
392; SANDY-NEXT: sete %dl # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000393; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
394; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
395; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000396; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000397;
398; HASWELL-LABEL: test_comiss:
399; HASWELL: # BB#0:
400; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000401; HASWELL-NEXT: setnp %al # sched: [1:0.50]
402; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000403; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
404; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000405; HASWELL-NEXT: setnp %al # sched: [1:0.50]
406; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000407; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
408; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
409; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000410; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000411;
412; BTVER2-LABEL: test_comiss:
413; BTVER2: # BB#0:
414; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
415; BTVER2-NEXT: setnp %al # sched: [1:0.50]
416; BTVER2-NEXT: sete %cl # sched: [1:0.50]
417; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
418; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
419; BTVER2-NEXT: setnp %al # sched: [1:0.50]
420; BTVER2-NEXT: sete %dl # sched: [1:0.50]
421; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
422; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
423; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
424; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000425;
426; ZNVER1-LABEL: test_comiss:
427; ZNVER1: # BB#0:
428; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
429; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
430; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
431; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
432; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
433; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
434; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
435; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
436; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
437; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
438; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000439 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
440 %2 = load <4 x float>, <4 x float> *%a2, align 4
441 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
442 %4 = or i32 %1, %3
443 ret i32 %4
444}
445declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
446
447define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
448; GENERIC-LABEL: test_cvtsi2ss:
449; GENERIC: # BB#0:
450; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1
451; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0
452; GENERIC-NEXT: addss %xmm1, %xmm0
453; GENERIC-NEXT: retq
454;
455; ATOM-LABEL: test_cvtsi2ss:
456; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000457; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
458; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
459; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
460; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000461;
462; SLM-LABEL: test_cvtsi2ss:
463; SLM: # BB#0:
464; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
465; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
466; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
467; SLM-NEXT: retq # sched: [4:1.00]
468;
469; SANDY-LABEL: test_cvtsi2ss:
470; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000471; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
472; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000473; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000474; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000475;
476; HASWELL-LABEL: test_cvtsi2ss:
477; HASWELL: # BB#0:
478; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
479; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
480; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000481; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000482;
483; BTVER2-LABEL: test_cvtsi2ss:
484; BTVER2: # BB#0:
485; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
486; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
487; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
488; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000489;
490; ZNVER1-LABEL: test_cvtsi2ss:
491; ZNVER1: # BB#0:
492; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
493; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
494; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
495; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000496 %1 = sitofp i32 %a0 to float
497 %2 = load i32, i32 *%a1, align 4
498 %3 = sitofp i32 %2 to float
499 %4 = fadd float %1, %3
500 ret float %4
501}
502
503define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
504; GENERIC-LABEL: test_cvtsi2ssq:
505; GENERIC: # BB#0:
506; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1
507; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0
508; GENERIC-NEXT: addss %xmm1, %xmm0
509; GENERIC-NEXT: retq
510;
511; ATOM-LABEL: test_cvtsi2ssq:
512; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000513; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
514; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
515; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
516; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000517;
518; SLM-LABEL: test_cvtsi2ssq:
519; SLM: # BB#0:
520; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
521; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
522; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
523; SLM-NEXT: retq # sched: [4:1.00]
524;
525; SANDY-LABEL: test_cvtsi2ssq:
526; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000527; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
528; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000529; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000530; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000531;
532; HASWELL-LABEL: test_cvtsi2ssq:
533; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +0000534; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000535; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
536; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000537; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000538;
539; BTVER2-LABEL: test_cvtsi2ssq:
540; BTVER2: # BB#0:
541; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
542; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
543; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
544; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000545;
546; ZNVER1-LABEL: test_cvtsi2ssq:
547; ZNVER1: # BB#0:
548; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
549; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
550; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
551; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000552 %1 = sitofp i64 %a0 to float
553 %2 = load i64, i64 *%a1, align 8
554 %3 = sitofp i64 %2 to float
555 %4 = fadd float %1, %3
556 ret float %4
557}
558
559define i32 @test_cvtss2si(float %a0, float *%a1) {
560; GENERIC-LABEL: test_cvtss2si:
561; GENERIC: # BB#0:
562; GENERIC-NEXT: cvtss2si %xmm0, %ecx
563; GENERIC-NEXT: cvtss2si (%rdi), %eax
564; GENERIC-NEXT: addl %ecx, %eax
565; GENERIC-NEXT: retq
566;
567; ATOM-LABEL: test_cvtss2si:
568; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000569; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
570; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
571; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
572; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000573;
574; SLM-LABEL: test_cvtss2si:
575; SLM: # BB#0:
576; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
577; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
578; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
579; SLM-NEXT: retq # sched: [4:1.00]
580;
581; SANDY-LABEL: test_cvtss2si:
582; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000583; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
584; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000585; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000586; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000587;
588; HASWELL-LABEL: test_cvtss2si:
589; HASWELL: # BB#0:
590; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000591; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000592; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000593; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000594;
595; BTVER2-LABEL: test_cvtss2si:
596; BTVER2: # BB#0:
597; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
598; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
599; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
600; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000601;
602; ZNVER1-LABEL: test_cvtss2si:
603; ZNVER1: # BB#0:
604; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
605; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
606; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
607; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000608 %1 = insertelement <4 x float> undef, float %a0, i32 0
609 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
610 %3 = load float, float *%a1, align 4
611 %4 = insertelement <4 x float> undef, float %3, i32 0
612 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
613 %6 = add i32 %2, %5
614 ret i32 %6
615}
616declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
617
618define i64 @test_cvtss2siq(float %a0, float *%a1) {
619; GENERIC-LABEL: test_cvtss2siq:
620; GENERIC: # BB#0:
621; GENERIC-NEXT: cvtss2si %xmm0, %rcx
622; GENERIC-NEXT: cvtss2si (%rdi), %rax
623; GENERIC-NEXT: addq %rcx, %rax
624; GENERIC-NEXT: retq
625;
626; ATOM-LABEL: test_cvtss2siq:
627; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000628; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
629; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
630; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
631; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000632;
633; SLM-LABEL: test_cvtss2siq:
634; SLM: # BB#0:
635; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
636; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
637; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
638; SLM-NEXT: retq # sched: [4:1.00]
639;
640; SANDY-LABEL: test_cvtss2siq:
641; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000642; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
643; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000644; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000645; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000646;
647; HASWELL-LABEL: test_cvtss2siq:
648; HASWELL: # BB#0:
649; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000650; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000651; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000652; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000653;
654; BTVER2-LABEL: test_cvtss2siq:
655; BTVER2: # BB#0:
656; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
657; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
658; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
659; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000660;
661; ZNVER1-LABEL: test_cvtss2siq:
662; ZNVER1: # BB#0:
663; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
664; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
665; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
666; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000667 %1 = insertelement <4 x float> undef, float %a0, i32 0
668 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
669 %3 = load float, float *%a1, align 4
670 %4 = insertelement <4 x float> undef, float %3, i32 0
671 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
672 %6 = add i64 %2, %5
673 ret i64 %6
674}
675declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
676
677define i32 @test_cvttss2si(float %a0, float *%a1) {
678; GENERIC-LABEL: test_cvttss2si:
679; GENERIC: # BB#0:
680; GENERIC-NEXT: cvttss2si %xmm0, %ecx
681; GENERIC-NEXT: cvttss2si (%rdi), %eax
682; GENERIC-NEXT: addl %ecx, %eax
683; GENERIC-NEXT: retq
684;
685; ATOM-LABEL: test_cvttss2si:
686; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000687; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
688; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
689; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
690; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000691;
692; SLM-LABEL: test_cvttss2si:
693; SLM: # BB#0:
694; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
695; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
696; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
697; SLM-NEXT: retq # sched: [4:1.00]
698;
699; SANDY-LABEL: test_cvttss2si:
700; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000701; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
702; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000703; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000704; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000705;
706; HASWELL-LABEL: test_cvttss2si:
707; HASWELL: # BB#0:
708; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000709; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000710; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000711; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000712;
713; BTVER2-LABEL: test_cvttss2si:
714; BTVER2: # BB#0:
715; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
716; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
717; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
718; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000719;
720; ZNVER1-LABEL: test_cvttss2si:
721; ZNVER1: # BB#0:
722; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
723; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
724; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
725; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000726 %1 = fptosi float %a0 to i32
727 %2 = load float, float *%a1, align 4
728 %3 = fptosi float %2 to i32
729 %4 = add i32 %1, %3
730 ret i32 %4
731}
732
733define i64 @test_cvttss2siq(float %a0, float *%a1) {
734; GENERIC-LABEL: test_cvttss2siq:
735; GENERIC: # BB#0:
736; GENERIC-NEXT: cvttss2si %xmm0, %rcx
737; GENERIC-NEXT: cvttss2si (%rdi), %rax
738; GENERIC-NEXT: addq %rcx, %rax
739; GENERIC-NEXT: retq
740;
741; ATOM-LABEL: test_cvttss2siq:
742; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000743; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
744; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
745; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
746; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000747;
748; SLM-LABEL: test_cvttss2siq:
749; SLM: # BB#0:
750; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
751; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
752; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
753; SLM-NEXT: retq # sched: [4:1.00]
754;
755; SANDY-LABEL: test_cvttss2siq:
756; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000757; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
758; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000759; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000760; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000761;
762; HASWELL-LABEL: test_cvttss2siq:
763; HASWELL: # BB#0:
764; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000765; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000766; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000767; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000768;
769; BTVER2-LABEL: test_cvttss2siq:
770; BTVER2: # BB#0:
771; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
772; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
773; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
774; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000775;
776; ZNVER1-LABEL: test_cvttss2siq:
777; ZNVER1: # BB#0:
778; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
779; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
780; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
781; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000782 %1 = fptosi float %a0 to i64
783 %2 = load float, float *%a1, align 4
784 %3 = fptosi float %2 to i64
785 %4 = add i64 %1, %3
786 ret i64 %4
787}
788
789define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
790; GENERIC-LABEL: test_divps:
791; GENERIC: # BB#0:
792; GENERIC-NEXT: divps %xmm1, %xmm0
793; GENERIC-NEXT: divps (%rdi), %xmm0
794; GENERIC-NEXT: retq
795;
796; ATOM-LABEL: test_divps:
797; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000798; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
799; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [125:62.50]
800; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000801;
802; SLM-LABEL: test_divps:
803; SLM: # BB#0:
804; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
805; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
806; SLM-NEXT: retq # sched: [4:1.00]
807;
808; SANDY-LABEL: test_divps:
809; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000810; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
811; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
812; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000813;
814; HASWELL-LABEL: test_divps:
815; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +0000816; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
817; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
818; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000819;
820; BTVER2-LABEL: test_divps:
821; BTVER2: # BB#0:
822; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
823; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
824; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000825;
826; ZNVER1-LABEL: test_divps:
827; ZNVER1: # BB#0:
828; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
829; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
830; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000831 %1 = fdiv <4 x float> %a0, %a1
832 %2 = load <4 x float>, <4 x float> *%a2, align 16
833 %3 = fdiv <4 x float> %1, %2
834 ret <4 x float> %3
835}
836
837define float @test_divss(float %a0, float %a1, float *%a2) {
838; GENERIC-LABEL: test_divss:
839; GENERIC: # BB#0:
840; GENERIC-NEXT: divss %xmm1, %xmm0
841; GENERIC-NEXT: divss (%rdi), %xmm0
842; GENERIC-NEXT: retq
843;
844; ATOM-LABEL: test_divss:
845; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000846; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
847; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [62:31.00]
848; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000849;
850; SLM-LABEL: test_divss:
851; SLM: # BB#0:
852; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
853; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
854; SLM-NEXT: retq # sched: [4:1.00]
855;
856; SANDY-LABEL: test_divss:
857; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000858; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
859; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
860; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000861;
862; HASWELL-LABEL: test_divss:
863; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +0000864; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
865; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
866; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000867;
868; BTVER2-LABEL: test_divss:
869; BTVER2: # BB#0:
870; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
871; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
872; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000873;
874; ZNVER1-LABEL: test_divss:
875; ZNVER1: # BB#0:
876; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
877; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
878; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000879 %1 = fdiv float %a0, %a1
880 %2 = load float, float *%a2, align 4
881 %3 = fdiv float %1, %2
882 ret float %3
883}
884
885define void @test_ldmxcsr(i32 %a0) {
886; GENERIC-LABEL: test_ldmxcsr:
887; GENERIC: # BB#0:
888; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
889; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
890; GENERIC-NEXT: retq
891;
892; ATOM-LABEL: test_ldmxcsr:
893; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000894; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
895; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
896; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000897;
898; SLM-LABEL: test_ldmxcsr:
899; SLM: # BB#0:
900; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
901; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
902; SLM-NEXT: retq # sched: [4:1.00]
903;
904; SANDY-LABEL: test_ldmxcsr:
905; SANDY: # BB#0:
906; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000907; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
908; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000909;
910; HASWELL-LABEL: test_ldmxcsr:
911; HASWELL: # BB#0:
912; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000913; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
914; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000915;
916; BTVER2-LABEL: test_ldmxcsr:
917; BTVER2: # BB#0:
918; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
919; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
920; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000921;
922; ZNVER1-LABEL: test_ldmxcsr:
923; ZNVER1: # BB#0:
924; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
925; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [8:0.50]
926; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000927 %1 = alloca i32, align 4
928 %2 = bitcast i32* %1 to i8*
929 store i32 %a0, i32* %1
930 call void @llvm.x86.sse.ldmxcsr(i8* %2)
931 ret void
932}
933declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
934
935define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
936; GENERIC-LABEL: test_maxps:
937; GENERIC: # BB#0:
938; GENERIC-NEXT: maxps %xmm1, %xmm0
939; GENERIC-NEXT: maxps (%rdi), %xmm0
940; GENERIC-NEXT: retq
941;
942; ATOM-LABEL: test_maxps:
943; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000944; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
945; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
946; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000947;
948; SLM-LABEL: test_maxps:
949; SLM: # BB#0:
950; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
951; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
952; SLM-NEXT: retq # sched: [4:1.00]
953;
954; SANDY-LABEL: test_maxps:
955; SANDY: # BB#0:
956; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000957; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
958; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000959;
960; HASWELL-LABEL: test_maxps:
961; HASWELL: # BB#0:
962; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000963; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
964; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000965;
966; BTVER2-LABEL: test_maxps:
967; BTVER2: # BB#0:
968; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
969; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
970; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000971;
972; ZNVER1-LABEL: test_maxps:
973; ZNVER1: # BB#0:
974; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
975; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
976; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000977 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
978 %2 = load <4 x float>, <4 x float> *%a2, align 16
979 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
980 ret <4 x float> %3
981}
982declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
983
984define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
985; GENERIC-LABEL: test_maxss:
986; GENERIC: # BB#0:
987; GENERIC-NEXT: maxss %xmm1, %xmm0
988; GENERIC-NEXT: maxss (%rdi), %xmm0
989; GENERIC-NEXT: retq
990;
991; ATOM-LABEL: test_maxss:
992; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000993; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
994; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
995; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000996;
997; SLM-LABEL: test_maxss:
998; SLM: # BB#0:
999; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1000; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
1001; SLM-NEXT: retq # sched: [4:1.00]
1002;
1003; SANDY-LABEL: test_maxss:
1004; SANDY: # BB#0:
1005; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001006; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1007; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001008;
1009; HASWELL-LABEL: test_maxss:
1010; HASWELL: # BB#0:
1011; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001012; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1013; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001014;
1015; BTVER2-LABEL: test_maxss:
1016; BTVER2: # BB#0:
1017; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1018; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1019; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001020;
1021; ZNVER1-LABEL: test_maxss:
1022; ZNVER1: # BB#0:
1023; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1024; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1025; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001026 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1027 %2 = load <4 x float>, <4 x float> *%a2, align 16
1028 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
1029 ret <4 x float> %3
1030}
1031declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1032
1033define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1034; GENERIC-LABEL: test_minps:
1035; GENERIC: # BB#0:
1036; GENERIC-NEXT: minps %xmm1, %xmm0
1037; GENERIC-NEXT: minps (%rdi), %xmm0
1038; GENERIC-NEXT: retq
1039;
1040; ATOM-LABEL: test_minps:
1041; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001042; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
1043; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
1044; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001045;
1046; SLM-LABEL: test_minps:
1047; SLM: # BB#0:
1048; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1049; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
1050; SLM-NEXT: retq # sched: [4:1.00]
1051;
1052; SANDY-LABEL: test_minps:
1053; SANDY: # BB#0:
1054; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001055; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1056; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001057;
1058; HASWELL-LABEL: test_minps:
1059; HASWELL: # BB#0:
1060; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001061; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1062; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001063;
1064; BTVER2-LABEL: test_minps:
1065; BTVER2: # BB#0:
1066; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1067; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1068; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001069;
1070; ZNVER1-LABEL: test_minps:
1071; ZNVER1: # BB#0:
1072; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1073; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1074; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001075 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1076 %2 = load <4 x float>, <4 x float> *%a2, align 16
1077 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
1078 ret <4 x float> %3
1079}
1080declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1081
1082define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1083; GENERIC-LABEL: test_minss:
1084; GENERIC: # BB#0:
1085; GENERIC-NEXT: minss %xmm1, %xmm0
1086; GENERIC-NEXT: minss (%rdi), %xmm0
1087; GENERIC-NEXT: retq
1088;
1089; ATOM-LABEL: test_minss:
1090; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001091; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
1092; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
1093; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001094;
1095; SLM-LABEL: test_minss:
1096; SLM: # BB#0:
1097; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1098; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
1099; SLM-NEXT: retq # sched: [4:1.00]
1100;
1101; SANDY-LABEL: test_minss:
1102; SANDY: # BB#0:
1103; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001104; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1105; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001106;
1107; HASWELL-LABEL: test_minss:
1108; HASWELL: # BB#0:
1109; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001110; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1111; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001112;
1113; BTVER2-LABEL: test_minss:
1114; BTVER2: # BB#0:
1115; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1116; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1117; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001118;
1119; ZNVER1-LABEL: test_minss:
1120; ZNVER1: # BB#0:
1121; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1122; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1123; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001124 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1125 %2 = load <4 x float>, <4 x float> *%a2, align 16
1126 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
1127 ret <4 x float> %3
1128}
1129declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1130
1131define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
1132; GENERIC-LABEL: test_movaps:
1133; GENERIC: # BB#0:
1134; GENERIC-NEXT: movaps (%rdi), %xmm0
1135; GENERIC-NEXT: addps %xmm0, %xmm0
1136; GENERIC-NEXT: movaps %xmm0, (%rsi)
1137; GENERIC-NEXT: retq
1138;
1139; ATOM-LABEL: test_movaps:
1140; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001141; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
1142; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1143; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1144; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001145;
1146; SLM-LABEL: test_movaps:
1147; SLM: # BB#0:
1148; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1149; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1150; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1151; SLM-NEXT: retq # sched: [4:1.00]
1152;
1153; SANDY-LABEL: test_movaps:
1154; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001155; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001156; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001157; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
1158; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001159;
1160; HASWELL-LABEL: test_movaps:
1161; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001162; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001163; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001164; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1165; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001166;
1167; BTVER2-LABEL: test_movaps:
1168; BTVER2: # BB#0:
1169; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1170; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1171; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1172; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001173;
1174; ZNVER1-LABEL: test_movaps:
1175; ZNVER1: # BB#0:
1176; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
1177; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1178; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
1179; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001180 %1 = load <4 x float>, <4 x float> *%a0, align 16
1181 %2 = fadd <4 x float> %1, %1
1182 store <4 x float> %2, <4 x float> *%a1, align 16
1183 ret void
1184}
1185
1186; TODO (v)movhlps
1187
1188define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1189; GENERIC-LABEL: test_movhlps:
1190; GENERIC: # BB#0:
1191; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1192; GENERIC-NEXT: retq
1193;
1194; ATOM-LABEL: test_movhlps:
1195; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001196; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1197; ATOM-NEXT: nop # sched: [1:0.50]
1198; ATOM-NEXT: nop # sched: [1:0.50]
1199; ATOM-NEXT: nop # sched: [1:0.50]
1200; ATOM-NEXT: nop # sched: [1:0.50]
1201; ATOM-NEXT: nop # sched: [1:0.50]
1202; ATOM-NEXT: nop # sched: [1:0.50]
1203; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001204;
1205; SLM-LABEL: test_movhlps:
1206; SLM: # BB#0:
1207; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1208; SLM-NEXT: retq # sched: [4:1.00]
1209;
1210; SANDY-LABEL: test_movhlps:
1211; SANDY: # BB#0:
1212; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001213; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001214;
1215; HASWELL-LABEL: test_movhlps:
1216; HASWELL: # BB#0:
1217; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001218; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001219;
1220; BTVER2-LABEL: test_movhlps:
1221; BTVER2: # BB#0:
1222; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1223; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001224;
1225; ZNVER1-LABEL: test_movhlps:
1226; ZNVER1: # BB#0:
1227; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1228; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001229 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1230 ret <4 x float> %1
1231}
1232
1233; TODO (v)movhps
1234
1235define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1236; GENERIC-LABEL: test_movhps:
1237; GENERIC: # BB#0:
1238; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1239; GENERIC-NEXT: addps %xmm0, %xmm1
1240; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
1241; GENERIC-NEXT: movlps %xmm1, (%rdi)
1242; GENERIC-NEXT: retq
1243;
1244; ATOM-LABEL: test_movhps:
1245; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001246; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1247; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1248; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1249; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1250; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001251;
1252; SLM-LABEL: test_movhps:
1253; SLM: # BB#0:
1254; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1255; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1256; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1257; SLM-NEXT: retq # sched: [4:1.00]
1258;
1259; SANDY-LABEL: test_movhps:
1260; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001261; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001262; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1263; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001264; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001265;
1266; HASWELL-LABEL: test_movhps:
1267; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001268; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001269; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001270; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1271; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001272;
1273; BTVER2-LABEL: test_movhps:
1274; BTVER2: # BB#0:
1275; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1276; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1277; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1278; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001279;
1280; ZNVER1-LABEL: test_movhps:
1281; ZNVER1: # BB#0:
1282; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
1283; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1284; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
1285; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001286 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1287 %2 = load <2 x float>, <2 x float> *%1, align 8
1288 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1289 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1290 %5 = fadd <4 x float> %a0, %4
1291 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1292 store <2 x float> %6, <2 x float>* %1
1293 ret void
1294}
1295
1296; TODO (v)movlhps
1297
1298define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1299; GENERIC-LABEL: test_movlhps:
1300; GENERIC: # BB#0:
1301; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1302; GENERIC-NEXT: addps %xmm1, %xmm0
1303; GENERIC-NEXT: retq
1304;
1305; ATOM-LABEL: test_movlhps:
1306; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001307; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1308; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1309; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001310;
1311; SLM-LABEL: test_movlhps:
1312; SLM: # BB#0:
1313; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1314; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1315; SLM-NEXT: retq # sched: [4:1.00]
1316;
1317; SANDY-LABEL: test_movlhps:
1318; SANDY: # BB#0:
1319; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1320; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001321; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001322;
1323; HASWELL-LABEL: test_movlhps:
1324; HASWELL: # BB#0:
1325; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1326; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001327; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001328;
1329; BTVER2-LABEL: test_movlhps:
1330; BTVER2: # BB#0:
1331; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1332; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1333; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001334;
1335; ZNVER1-LABEL: test_movlhps:
1336; ZNVER1: # BB#0:
1337; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1338; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1339; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001340 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1341 %2 = fadd <4 x float> %a1, %1
1342 ret <4 x float> %2
1343}
1344
1345define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1346; GENERIC-LABEL: test_movlps:
1347; GENERIC: # BB#0:
1348; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
1349; GENERIC-NEXT: addps %xmm0, %xmm1
1350; GENERIC-NEXT: movlps %xmm1, (%rdi)
1351; GENERIC-NEXT: retq
1352;
1353; ATOM-LABEL: test_movlps:
1354; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001355; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1356; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1357; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1358; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001359;
1360; SLM-LABEL: test_movlps:
1361; SLM: # BB#0:
1362; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1363; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1364; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1365; SLM-NEXT: retq # sched: [4:1.00]
1366;
1367; SANDY-LABEL: test_movlps:
1368; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001369; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001370; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001371; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
1372; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001373;
1374; HASWELL-LABEL: test_movlps:
1375; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001376; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001377; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001378; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1379; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001380;
1381; BTVER2-LABEL: test_movlps:
1382; BTVER2: # BB#0:
1383; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1384; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1385; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1386; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001387;
1388; ZNVER1-LABEL: test_movlps:
1389; ZNVER1: # BB#0:
1390; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
1391; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1392; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
1393; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001394 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1395 %2 = load <2 x float>, <2 x float> *%1, align 8
1396 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1397 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1398 %5 = fadd <4 x float> %a0, %4
1399 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1400 store <2 x float> %6, <2 x float>* %1
1401 ret void
1402}
1403
1404define i32 @test_movmskps(<4 x float> %a0) {
1405; GENERIC-LABEL: test_movmskps:
1406; GENERIC: # BB#0:
1407; GENERIC-NEXT: movmskps %xmm0, %eax
1408; GENERIC-NEXT: retq
1409;
1410; ATOM-LABEL: test_movmskps:
1411; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001412; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
1413; ATOM-NEXT: nop # sched: [1:0.50]
1414; ATOM-NEXT: nop # sched: [1:0.50]
1415; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001416;
1417; SLM-LABEL: test_movmskps:
1418; SLM: # BB#0:
1419; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1420; SLM-NEXT: retq # sched: [4:1.00]
1421;
1422; SANDY-LABEL: test_movmskps:
1423; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001424; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1425; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001426;
1427; HASWELL-LABEL: test_movmskps:
1428; HASWELL: # BB#0:
1429; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001430; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001431;
1432; BTVER2-LABEL: test_movmskps:
1433; BTVER2: # BB#0:
1434; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
1435; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001436;
1437; ZNVER1-LABEL: test_movmskps:
1438; ZNVER1: # BB#0:
1439; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.25]
1440; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001441 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1442 ret i32 %1
1443}
1444declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1445
1446define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1447; GENERIC-LABEL: test_movntps:
1448; GENERIC: # BB#0:
1449; GENERIC-NEXT: movntps %xmm0, (%rdi)
1450; GENERIC-NEXT: retq
1451;
1452; ATOM-LABEL: test_movntps:
1453; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001454; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1455; ATOM-NEXT: nop # sched: [1:0.50]
1456; ATOM-NEXT: nop # sched: [1:0.50]
1457; ATOM-NEXT: nop # sched: [1:0.50]
1458; ATOM-NEXT: nop # sched: [1:0.50]
1459; ATOM-NEXT: nop # sched: [1:0.50]
1460; ATOM-NEXT: nop # sched: [1:0.50]
1461; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001462;
1463; SLM-LABEL: test_movntps:
1464; SLM: # BB#0:
1465; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1466; SLM-NEXT: retq # sched: [4:1.00]
1467;
1468; SANDY-LABEL: test_movntps:
1469; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001470; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
1471; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001472;
1473; HASWELL-LABEL: test_movntps:
1474; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001475; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1476; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001477;
1478; BTVER2-LABEL: test_movntps:
1479; BTVER2: # BB#0:
1480; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1481; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001482;
1483; ZNVER1-LABEL: test_movntps:
1484; ZNVER1: # BB#0:
1485; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
1486; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001487 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
1488 ret void
1489}
1490
1491define void @test_movss_mem(float* %a0, float* %a1) {
1492; GENERIC-LABEL: test_movss_mem:
1493; GENERIC: # BB#0:
1494; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1495; GENERIC-NEXT: addss %xmm0, %xmm0
1496; GENERIC-NEXT: movss %xmm0, (%rsi)
1497; GENERIC-NEXT: retq
1498;
1499; ATOM-LABEL: test_movss_mem:
1500; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001501; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
1502; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
1503; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1504; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001505;
1506; SLM-LABEL: test_movss_mem:
1507; SLM: # BB#0:
1508; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
1509; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1510; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1511; SLM-NEXT: retq # sched: [4:1.00]
1512;
1513; SANDY-LABEL: test_movss_mem:
1514; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001515; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001516; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001517; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
1518; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001519;
1520; HASWELL-LABEL: test_movss_mem:
1521; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001522; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001523; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001524; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1525; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001526;
1527; BTVER2-LABEL: test_movss_mem:
1528; BTVER2: # BB#0:
1529; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
1530; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1531; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1532; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001533;
1534; ZNVER1-LABEL: test_movss_mem:
1535; ZNVER1: # BB#0:
1536; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
1537; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1538; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
1539; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001540 %1 = load float, float* %a0, align 1
1541 %2 = fadd float %1, %1
1542 store float %2, float *%a1, align 1
1543 ret void
1544}
1545
1546define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
1547; GENERIC-LABEL: test_movss_reg:
1548; GENERIC: # BB#0:
1549; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1550; GENERIC-NEXT: retq
1551;
1552; ATOM-LABEL: test_movss_reg:
1553; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001554; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1555; ATOM-NEXT: nop # sched: [1:0.50]
1556; ATOM-NEXT: nop # sched: [1:0.50]
1557; ATOM-NEXT: nop # sched: [1:0.50]
1558; ATOM-NEXT: nop # sched: [1:0.50]
1559; ATOM-NEXT: nop # sched: [1:0.50]
1560; ATOM-NEXT: nop # sched: [1:0.50]
1561; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001562;
1563; SLM-LABEL: test_movss_reg:
1564; SLM: # BB#0:
1565; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1566; SLM-NEXT: retq # sched: [4:1.00]
1567;
1568; SANDY-LABEL: test_movss_reg:
1569; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001570; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1571; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001572;
1573; HASWELL-LABEL: test_movss_reg:
1574; HASWELL: # BB#0:
1575; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001576; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001577;
1578; BTVER2-LABEL: test_movss_reg:
1579; BTVER2: # BB#0:
1580; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1581; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001582;
1583; ZNVER1-LABEL: test_movss_reg:
1584; ZNVER1: # BB#0:
1585; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1586; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001587 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1588 ret <4 x float> %1
1589}
1590
1591define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
1592; GENERIC-LABEL: test_movups:
1593; GENERIC: # BB#0:
1594; GENERIC-NEXT: movups (%rdi), %xmm0
1595; GENERIC-NEXT: addps %xmm0, %xmm0
1596; GENERIC-NEXT: movups %xmm0, (%rsi)
1597; GENERIC-NEXT: retq
1598;
1599; ATOM-LABEL: test_movups:
1600; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001601; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
1602; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1603; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
1604; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001605;
1606; SLM-LABEL: test_movups:
1607; SLM: # BB#0:
1608; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
1609; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1610; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
1611; SLM-NEXT: retq # sched: [4:1.00]
1612;
1613; SANDY-LABEL: test_movups:
1614; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001615; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001616; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001617; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
1618; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001619;
1620; HASWELL-LABEL: test_movups:
1621; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001622; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001623; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001624; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1625; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001626;
1627; BTVER2-LABEL: test_movups:
1628; BTVER2: # BB#0:
1629; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
1630; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1631; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1632; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001633;
1634; ZNVER1-LABEL: test_movups:
1635; ZNVER1: # BB#0:
1636; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
1637; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1638; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
1639; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001640 %1 = load <4 x float>, <4 x float> *%a0, align 1
1641 %2 = fadd <4 x float> %1, %1
1642 store <4 x float> %2, <4 x float> *%a1, align 1
1643 ret void
1644}
1645
1646define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1647; GENERIC-LABEL: test_mulps:
1648; GENERIC: # BB#0:
1649; GENERIC-NEXT: mulps %xmm1, %xmm0
1650; GENERIC-NEXT: mulps (%rdi), %xmm0
1651; GENERIC-NEXT: retq
1652;
1653; ATOM-LABEL: test_mulps:
1654; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001655; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
1656; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [10:5.00]
1657; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001658;
1659; SLM-LABEL: test_mulps:
1660; SLM: # BB#0:
1661; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
1662; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
1663; SLM-NEXT: retq # sched: [4:1.00]
1664;
1665; SANDY-LABEL: test_mulps:
1666; SANDY: # BB#0:
1667; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001668; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
1669; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001670;
1671; HASWELL-LABEL: test_mulps:
1672; HASWELL: # BB#0:
1673; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001674; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1675; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001676;
1677; BTVER2-LABEL: test_mulps:
1678; BTVER2: # BB#0:
1679; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1680; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1681; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001682;
1683; ZNVER1-LABEL: test_mulps:
1684; ZNVER1: # BB#0:
1685; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1686; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
1687; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001688 %1 = fmul <4 x float> %a0, %a1
1689 %2 = load <4 x float>, <4 x float> *%a2, align 16
1690 %3 = fmul <4 x float> %1, %2
1691 ret <4 x float> %3
1692}
1693
1694define float @test_mulss(float %a0, float %a1, float *%a2) {
1695; GENERIC-LABEL: test_mulss:
1696; GENERIC: # BB#0:
1697; GENERIC-NEXT: mulss %xmm1, %xmm0
1698; GENERIC-NEXT: mulss (%rdi), %xmm0
1699; GENERIC-NEXT: retq
1700;
1701; ATOM-LABEL: test_mulss:
1702; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001703; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
1704; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [5:5.00]
1705; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001706;
1707; SLM-LABEL: test_mulss:
1708; SLM: # BB#0:
1709; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
1710; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
1711; SLM-NEXT: retq # sched: [4:1.00]
1712;
1713; SANDY-LABEL: test_mulss:
1714; SANDY: # BB#0:
1715; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001716; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
1717; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001718;
1719; HASWELL-LABEL: test_mulss:
1720; HASWELL: # BB#0:
1721; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001722; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1723; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001724;
1725; BTVER2-LABEL: test_mulss:
1726; BTVER2: # BB#0:
1727; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1728; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1729; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001730;
1731; ZNVER1-LABEL: test_mulss:
1732; ZNVER1: # BB#0:
1733; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1734; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
1735; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001736 %1 = fmul float %a0, %a1
1737 %2 = load float, float *%a2, align 4
1738 %3 = fmul float %1, %2
1739 ret float %3
1740}
1741
1742define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1743; GENERIC-LABEL: test_orps:
1744; GENERIC: # BB#0:
1745; GENERIC-NEXT: orps %xmm1, %xmm0
1746; GENERIC-NEXT: orps (%rdi), %xmm0
1747; GENERIC-NEXT: retq
1748;
1749; ATOM-LABEL: test_orps:
1750; ATOM: # BB#0:
1751; ATOM-NEXT: orps %xmm1, %xmm0
1752; ATOM-NEXT: orps (%rdi), %xmm0
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001753; ATOM-NEXT: nop # sched: [1:0.50]
1754; ATOM-NEXT: nop # sched: [1:0.50]
1755; ATOM-NEXT: nop # sched: [1:0.50]
1756; ATOM-NEXT: nop # sched: [1:0.50]
1757; ATOM-NEXT: nop # sched: [1:0.50]
1758; ATOM-NEXT: nop # sched: [1:0.50]
1759; ATOM-NEXT: nop # sched: [1:0.50]
1760; ATOM-NEXT: nop # sched: [1:0.50]
1761; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001762;
1763; SLM-LABEL: test_orps:
1764; SLM: # BB#0:
1765; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
1766; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
1767; SLM-NEXT: retq # sched: [4:1.00]
1768;
1769; SANDY-LABEL: test_orps:
1770; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001771; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1772; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1773; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001774;
1775; HASWELL-LABEL: test_orps:
1776; HASWELL: # BB#0:
1777; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001778; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
1779; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001780;
1781; BTVER2-LABEL: test_orps:
1782; BTVER2: # BB#0:
1783; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1784; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1785; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001786;
1787; ZNVER1-LABEL: test_orps:
1788; ZNVER1: # BB#0:
1789; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1790; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1791; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001792 %1 = bitcast <4 x float> %a0 to <4 x i32>
1793 %2 = bitcast <4 x float> %a1 to <4 x i32>
1794 %3 = or <4 x i32> %1, %2
1795 %4 = load <4 x float>, <4 x float> *%a2, align 16
1796 %5 = bitcast <4 x float> %4 to <4 x i32>
1797 %6 = or <4 x i32> %3, %5
1798 %7 = bitcast <4 x i32> %6 to <4 x float>
1799 ret <4 x float> %7
1800}
1801
1802define void @test_prefetchnta(i8* %a0) {
1803; GENERIC-LABEL: test_prefetchnta:
1804; GENERIC: # BB#0:
1805; GENERIC-NEXT: prefetchnta (%rdi)
1806; GENERIC-NEXT: retq
1807;
1808; ATOM-LABEL: test_prefetchnta:
1809; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001810; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
1811; ATOM-NEXT: nop # sched: [1:0.50]
1812; ATOM-NEXT: nop # sched: [1:0.50]
1813; ATOM-NEXT: nop # sched: [1:0.50]
1814; ATOM-NEXT: nop # sched: [1:0.50]
1815; ATOM-NEXT: nop # sched: [1:0.50]
1816; ATOM-NEXT: nop # sched: [1:0.50]
1817; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001818;
1819; SLM-LABEL: test_prefetchnta:
1820; SLM: # BB#0:
1821; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
1822; SLM-NEXT: retq # sched: [4:1.00]
1823;
1824; SANDY-LABEL: test_prefetchnta:
1825; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001826; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
1827; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001828;
1829; HASWELL-LABEL: test_prefetchnta:
1830; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001831; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
1832; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001833;
1834; BTVER2-LABEL: test_prefetchnta:
1835; BTVER2: # BB#0:
1836; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
1837; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001838;
1839; ZNVER1-LABEL: test_prefetchnta:
1840; ZNVER1: # BB#0:
1841; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
1842; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001843 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
1844 ret void
1845}
1846declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
1847
1848define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
1849; GENERIC-LABEL: test_rcpps:
1850; GENERIC: # BB#0:
1851; GENERIC-NEXT: rcpps %xmm0, %xmm1
1852; GENERIC-NEXT: rcpps (%rdi), %xmm0
1853; GENERIC-NEXT: addps %xmm1, %xmm0
1854; GENERIC-NEXT: retq
1855;
1856; ATOM-LABEL: test_rcpps:
1857; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001858; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
1859; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
1860; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1861; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1862; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001863;
1864; SLM-LABEL: test_rcpps:
1865; SLM: # BB#0:
1866; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
1867; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
1868; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1869; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
1870; SLM-NEXT: retq # sched: [4:1.00]
1871;
1872; SANDY-LABEL: test_rcpps:
1873; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001874; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00]
1875; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001876; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001877; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001878;
1879; HASWELL-LABEL: test_rcpps:
1880; HASWELL: # BB#0:
1881; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001882; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001883; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001884; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001885;
1886; BTVER2-LABEL: test_rcpps:
1887; BTVER2: # BB#0:
1888; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
1889; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
1890; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1891; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001892;
1893; ZNVER1-LABEL: test_rcpps:
1894; ZNVER1: # BB#0:
1895; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
1896; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
1897; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1898; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001899 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1900 %2 = load <4 x float>, <4 x float> *%a1, align 16
1901 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
1902 %4 = fadd <4 x float> %1, %3
1903 ret <4 x float> %4
1904}
1905declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1906
1907; TODO - rcpss_m
1908
1909define <4 x float> @test_rcpss(float %a0, float *%a1) {
1910; GENERIC-LABEL: test_rcpss:
1911; GENERIC: # BB#0:
1912; GENERIC-NEXT: rcpss %xmm0, %xmm0
1913; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1914; GENERIC-NEXT: rcpss %xmm1, %xmm1
1915; GENERIC-NEXT: addps %xmm1, %xmm0
1916; GENERIC-NEXT: retq
1917;
1918; ATOM-LABEL: test_rcpss:
1919; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001920; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001921; ATOM-NEXT: rcpss %xmm0, %xmm0
1922; ATOM-NEXT: rcpss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001923; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1924; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001925;
1926; SLM-LABEL: test_rcpss:
1927; SLM: # BB#0:
1928; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
1929; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
1930; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
1931; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1932; SLM-NEXT: retq # sched: [4:1.00]
1933;
1934; SANDY-LABEL: test_rcpss:
1935; SANDY: # BB#0:
1936; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001937; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001938; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
1939; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001940; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001941;
1942; HASWELL-LABEL: test_rcpss:
1943; HASWELL: # BB#0:
1944; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001945; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001946; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
1947; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001948; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001949;
1950; BTVER2-LABEL: test_rcpss:
1951; BTVER2: # BB#0:
1952; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
1953; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
1954; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
1955; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1956; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001957;
1958; ZNVER1-LABEL: test_rcpss:
1959; ZNVER1: # BB#0:
1960; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
1961; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
1962; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
1963; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1964; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001965 %1 = insertelement <4 x float> undef, float %a0, i32 0
1966 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
1967 %3 = load float, float *%a1, align 4
1968 %4 = insertelement <4 x float> undef, float %3, i32 0
1969 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
1970 %6 = fadd <4 x float> %2, %5
1971 ret <4 x float> %6
1972}
1973declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1974
1975define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
1976; GENERIC-LABEL: test_rsqrtps:
1977; GENERIC: # BB#0:
1978; GENERIC-NEXT: rsqrtps %xmm0, %xmm1
1979; GENERIC-NEXT: rsqrtps (%rdi), %xmm0
1980; GENERIC-NEXT: addps %xmm1, %xmm0
1981; GENERIC-NEXT: retq
1982;
1983; ATOM-LABEL: test_rsqrtps:
1984; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001985; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
1986; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
1987; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1988; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1989; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001990;
1991; SLM-LABEL: test_rsqrtps:
1992; SLM: # BB#0:
1993; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
1994; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
1995; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1996; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
1997; SLM-NEXT: retq # sched: [4:1.00]
1998;
1999; SANDY-LABEL: test_rsqrtps:
2000; SANDY: # BB#0:
2001; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002002; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002003; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002004; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002005;
2006; HASWELL-LABEL: test_rsqrtps:
2007; HASWELL: # BB#0:
2008; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002009; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002010; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002011; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002012;
2013; BTVER2-LABEL: test_rsqrtps:
2014; BTVER2: # BB#0:
2015; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
2016; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
2017; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2018; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002019;
2020; ZNVER1-LABEL: test_rsqrtps:
2021; ZNVER1: # BB#0:
2022; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
2023; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
2024; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2025; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002026 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2027 %2 = load <4 x float>, <4 x float> *%a1, align 16
2028 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
2029 %4 = fadd <4 x float> %1, %3
2030 ret <4 x float> %4
2031}
2032declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2033
2034; TODO - rsqrtss_m
2035
2036define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
2037; GENERIC-LABEL: test_rsqrtss:
2038; GENERIC: # BB#0:
2039; GENERIC-NEXT: rsqrtss %xmm0, %xmm0
2040; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2041; GENERIC-NEXT: rsqrtss %xmm1, %xmm1
2042; GENERIC-NEXT: addps %xmm1, %xmm0
2043; GENERIC-NEXT: retq
2044;
2045; ATOM-LABEL: test_rsqrtss:
2046; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002047; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002048; ATOM-NEXT: rsqrtss %xmm0, %xmm0
2049; ATOM-NEXT: rsqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002050; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2051; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002052;
2053; SLM-LABEL: test_rsqrtss:
2054; SLM: # BB#0:
2055; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2056; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
2057; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
2058; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2059; SLM-NEXT: retq # sched: [4:1.00]
2060;
2061; SANDY-LABEL: test_rsqrtss:
2062; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002063; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2064; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2065; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002066; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002067; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002068;
2069; HASWELL-LABEL: test_rsqrtss:
2070; HASWELL: # BB#0:
2071; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002072; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002073; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2074; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002075; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002076;
2077; BTVER2-LABEL: test_rsqrtss:
2078; BTVER2: # BB#0:
2079; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2080; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2081; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2082; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2083; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002084;
2085; ZNVER1-LABEL: test_rsqrtss:
2086; ZNVER1: # BB#0:
2087; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2088; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
2089; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
2090; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2091; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002092 %1 = insertelement <4 x float> undef, float %a0, i32 0
2093 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
2094 %3 = load float, float *%a1, align 4
2095 %4 = insertelement <4 x float> undef, float %3, i32 0
2096 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
2097 %6 = fadd <4 x float> %2, %5
2098 ret <4 x float> %6
2099}
2100declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2101
2102define void @test_sfence() {
2103; GENERIC-LABEL: test_sfence:
2104; GENERIC: # BB#0:
2105; GENERIC-NEXT: sfence
2106; GENERIC-NEXT: retq
2107;
2108; ATOM-LABEL: test_sfence:
2109; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002110; ATOM-NEXT: sfence # sched: [1:1.00]
2111; ATOM-NEXT: nop # sched: [1:0.50]
2112; ATOM-NEXT: nop # sched: [1:0.50]
2113; ATOM-NEXT: nop # sched: [1:0.50]
2114; ATOM-NEXT: nop # sched: [1:0.50]
2115; ATOM-NEXT: nop # sched: [1:0.50]
2116; ATOM-NEXT: nop # sched: [1:0.50]
2117; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002118;
2119; SLM-LABEL: test_sfence:
2120; SLM: # BB#0:
2121; SLM-NEXT: sfence # sched: [1:1.00]
2122; SLM-NEXT: retq # sched: [4:1.00]
2123;
2124; SANDY-LABEL: test_sfence:
2125; SANDY: # BB#0:
2126; SANDY-NEXT: sfence # sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002127; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002128;
2129; HASWELL-LABEL: test_sfence:
2130; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00002131; HASWELL-NEXT: sfence # sched: [1:1.00]
2132; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002133;
2134; BTVER2-LABEL: test_sfence:
2135; BTVER2: # BB#0:
2136; BTVER2-NEXT: sfence # sched: [1:1.00]
2137; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002138;
2139; ZNVER1-LABEL: test_sfence:
2140; ZNVER1: # BB#0:
2141; ZNVER1-NEXT: sfence # sched: [1:0.50]
2142; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002143 call void @llvm.x86.sse.sfence()
2144 ret void
2145}
2146declare void @llvm.x86.sse.sfence() nounwind readnone
2147
2148define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
2149; GENERIC-LABEL: test_shufps:
2150; GENERIC: # BB#0:
2151; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
2152; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
2153; GENERIC-NEXT: retq
2154;
2155; ATOM-LABEL: test_shufps:
2156; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002157; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2158; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2159; ATOM-NEXT: nop # sched: [1:0.50]
2160; ATOM-NEXT: nop # sched: [1:0.50]
2161; ATOM-NEXT: nop # sched: [1:0.50]
2162; ATOM-NEXT: nop # sched: [1:0.50]
2163; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002164;
2165; SLM-LABEL: test_shufps:
2166; SLM: # BB#0:
2167; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2168; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
2169; SLM-NEXT: retq # sched: [4:1.00]
2170;
2171; SANDY-LABEL: test_shufps:
2172; SANDY: # BB#0:
2173; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002174; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2175; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002176;
2177; HASWELL-LABEL: test_shufps:
2178; HASWELL: # BB#0:
2179; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002180; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
2181; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002182;
2183; BTVER2-LABEL: test_shufps:
2184; BTVER2: # BB#0:
2185; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2186; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2187; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002188;
2189; ZNVER1-LABEL: test_shufps:
2190; ZNVER1: # BB#0:
2191; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2192; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [8:0.50]
2193; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002194 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2195 %2 = load <4 x float>, <4 x float> *%a2, align 16
2196 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
2197 ret <4 x float> %3
2198}
2199
2200define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
2201; GENERIC-LABEL: test_sqrtps:
2202; GENERIC: # BB#0:
2203; GENERIC-NEXT: sqrtps %xmm0, %xmm1
2204; GENERIC-NEXT: sqrtps (%rdi), %xmm0
2205; GENERIC-NEXT: addps %xmm1, %xmm0
2206; GENERIC-NEXT: retq
2207;
2208; ATOM-LABEL: test_sqrtps:
2209; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002210; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
2211; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
2212; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2213; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002214;
2215; SLM-LABEL: test_sqrtps:
2216; SLM: # BB#0:
2217; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
2218; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
2219; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2220; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2221; SLM-NEXT: retq # sched: [4:1.00]
2222;
2223; SANDY-LABEL: test_sqrtps:
2224; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002225; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2226; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002227; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002228; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002229;
2230; HASWELL-LABEL: test_sqrtps:
2231; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00002232; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
2233; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002234; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002235; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002236;
2237; BTVER2-LABEL: test_sqrtps:
2238; BTVER2: # BB#0:
2239; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
2240; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
2241; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2242; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002243;
2244; ZNVER1-LABEL: test_sqrtps:
2245; ZNVER1: # BB#0:
2246; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
2247; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
2248; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2249; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002250 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
2251 %2 = load <4 x float>, <4 x float> *%a1, align 16
2252 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
2253 %4 = fadd <4 x float> %1, %3
2254 ret <4 x float> %4
2255}
2256declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2257
2258; TODO - sqrtss_m
2259
2260define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
2261; GENERIC-LABEL: test_sqrtss:
2262; GENERIC: # BB#0:
2263; GENERIC-NEXT: sqrtss %xmm0, %xmm0
2264; GENERIC-NEXT: movaps (%rdi), %xmm1
2265; GENERIC-NEXT: sqrtss %xmm1, %xmm1
2266; GENERIC-NEXT: addps %xmm1, %xmm0
2267; GENERIC-NEXT: retq
2268;
2269; ATOM-LABEL: test_sqrtss:
2270; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002271; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002272; ATOM-NEXT: sqrtss %xmm0, %xmm0
2273; ATOM-NEXT: sqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002274; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2275; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002276;
2277; SLM-LABEL: test_sqrtss:
2278; SLM: # BB#0:
2279; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
2280; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
2281; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
2282; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2283; SLM-NEXT: retq # sched: [4:1.00]
2284;
2285; SANDY-LABEL: test_sqrtss:
2286; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002287; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
2288; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
2289; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002290; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002291; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002292;
2293; HASWELL-LABEL: test_sqrtss:
2294; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00002295; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
2296; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
2297; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002298; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002299; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002300;
2301; BTVER2-LABEL: test_sqrtss:
2302; BTVER2: # BB#0:
2303; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
2304; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
2305; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
2306; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2307; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002308;
2309; ZNVER1-LABEL: test_sqrtss:
2310; ZNVER1: # BB#0:
2311; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
2312; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [27:1.00]
2313; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [27:1.00]
2314; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2315; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002316 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
2317 %2 = load <4 x float>, <4 x float> *%a1, align 16
2318 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
2319 %4 = fadd <4 x float> %1, %3
2320 ret <4 x float> %4
2321}
2322declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2323
2324define i32 @test_stmxcsr() {
2325; GENERIC-LABEL: test_stmxcsr:
2326; GENERIC: # BB#0:
2327; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
2328; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax
2329; GENERIC-NEXT: retq
2330;
2331; ATOM-LABEL: test_stmxcsr:
2332; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002333; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
2334; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
2335; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002336;
2337; SLM-LABEL: test_stmxcsr:
2338; SLM: # BB#0:
2339; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2340; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
2341; SLM-NEXT: retq # sched: [4:1.00]
2342;
2343; SANDY-LABEL: test_stmxcsr:
2344; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002345; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
2346; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2347; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002348;
2349; HASWELL-LABEL: test_stmxcsr:
2350; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00002351; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
2352; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
2353; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002354;
2355; BTVER2-LABEL: test_stmxcsr:
2356; BTVER2: # BB#0:
2357; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2358; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
2359; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002360;
2361; ZNVER1-LABEL: test_stmxcsr:
2362; ZNVER1: # BB#0:
2363; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:0.50]
2364; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
2365; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002366 %1 = alloca i32, align 4
2367 %2 = bitcast i32* %1 to i8*
2368 call void @llvm.x86.sse.stmxcsr(i8* %2)
2369 %3 = load i32, i32* %1, align 4
2370 ret i32 %3
2371}
2372declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
2373
2374define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2375; GENERIC-LABEL: test_subps:
2376; GENERIC: # BB#0:
2377; GENERIC-NEXT: subps %xmm1, %xmm0
2378; GENERIC-NEXT: subps (%rdi), %xmm0
2379; GENERIC-NEXT: retq
2380;
2381; ATOM-LABEL: test_subps:
2382; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002383; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
2384; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
2385; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002386;
2387; SLM-LABEL: test_subps:
2388; SLM: # BB#0:
2389; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2390; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
2391; SLM-NEXT: retq # sched: [4:1.00]
2392;
2393; SANDY-LABEL: test_subps:
2394; SANDY: # BB#0:
2395; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002396; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2397; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002398;
2399; HASWELL-LABEL: test_subps:
2400; HASWELL: # BB#0:
2401; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002402; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2403; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002404;
2405; BTVER2-LABEL: test_subps:
2406; BTVER2: # BB#0:
2407; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2408; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2409; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002410;
2411; ZNVER1-LABEL: test_subps:
2412; ZNVER1: # BB#0:
2413; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2414; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
2415; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002416 %1 = fsub <4 x float> %a0, %a1
2417 %2 = load <4 x float>, <4 x float> *%a2, align 16
2418 %3 = fsub <4 x float> %1, %2
2419 ret <4 x float> %3
2420}
2421
2422define float @test_subss(float %a0, float %a1, float *%a2) {
2423; GENERIC-LABEL: test_subss:
2424; GENERIC: # BB#0:
2425; GENERIC-NEXT: subss %xmm1, %xmm0
2426; GENERIC-NEXT: subss (%rdi), %xmm0
2427; GENERIC-NEXT: retq
2428;
2429; ATOM-LABEL: test_subss:
2430; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002431; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
2432; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
2433; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002434;
2435; SLM-LABEL: test_subss:
2436; SLM: # BB#0:
2437; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
2438; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
2439; SLM-NEXT: retq # sched: [4:1.00]
2440;
2441; SANDY-LABEL: test_subss:
2442; SANDY: # BB#0:
2443; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002444; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2445; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002446;
2447; HASWELL-LABEL: test_subss:
2448; HASWELL: # BB#0:
2449; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002450; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2451; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002452;
2453; BTVER2-LABEL: test_subss:
2454; BTVER2: # BB#0:
2455; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2456; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2457; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002458;
2459; ZNVER1-LABEL: test_subss:
2460; ZNVER1: # BB#0:
2461; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2462; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
2463; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002464 %1 = fsub float %a0, %a1
2465 %2 = load float, float *%a2, align 4
2466 %3 = fsub float %1, %2
2467 ret float %3
2468}
2469
2470define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2471; GENERIC-LABEL: test_ucomiss:
2472; GENERIC: # BB#0:
2473; GENERIC-NEXT: ucomiss %xmm1, %xmm0
2474; GENERIC-NEXT: setnp %al
2475; GENERIC-NEXT: sete %cl
2476; GENERIC-NEXT: andb %al, %cl
2477; GENERIC-NEXT: ucomiss (%rdi), %xmm0
2478; GENERIC-NEXT: setnp %al
2479; GENERIC-NEXT: sete %dl
2480; GENERIC-NEXT: andb %al, %dl
2481; GENERIC-NEXT: orb %cl, %dl
2482; GENERIC-NEXT: movzbl %dl, %eax
2483; GENERIC-NEXT: retq
2484;
2485; ATOM-LABEL: test_ucomiss:
2486; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002487; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
2488; ATOM-NEXT: setnp %al # sched: [1:0.50]
2489; ATOM-NEXT: sete %cl # sched: [1:0.50]
2490; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
2491; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
2492; ATOM-NEXT: setnp %al # sched: [1:0.50]
2493; ATOM-NEXT: sete %dl # sched: [1:0.50]
2494; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
2495; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
2496; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
2497; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002498;
2499; SLM-LABEL: test_ucomiss:
2500; SLM: # BB#0:
2501; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
2502; SLM-NEXT: setnp %al # sched: [1:0.50]
2503; SLM-NEXT: sete %cl # sched: [1:0.50]
2504; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
2505; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
2506; SLM-NEXT: setnp %al # sched: [1:0.50]
2507; SLM-NEXT: sete %dl # sched: [1:0.50]
2508; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
2509; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
2510; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2511; SLM-NEXT: retq # sched: [4:1.00]
2512;
2513; SANDY-LABEL: test_ucomiss:
2514; SANDY: # BB#0:
2515; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002516; SANDY-NEXT: setnp %al # sched: [1:1.00]
2517; SANDY-NEXT: sete %cl # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002518; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
2519; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002520; SANDY-NEXT: setnp %al # sched: [1:1.00]
2521; SANDY-NEXT: sete %dl # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002522; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
2523; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
2524; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002525; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002526;
2527; HASWELL-LABEL: test_ucomiss:
2528; HASWELL: # BB#0:
2529; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002530; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2531; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002532; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
2533; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002534; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2535; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002536; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
2537; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
2538; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002539; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002540;
2541; BTVER2-LABEL: test_ucomiss:
2542; BTVER2: # BB#0:
2543; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2544; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2545; BTVER2-NEXT: sete %cl # sched: [1:0.50]
2546; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
2547; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
2548; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2549; BTVER2-NEXT: sete %dl # sched: [1:0.50]
2550; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
2551; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
2552; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2553; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002554;
2555; ZNVER1-LABEL: test_ucomiss:
2556; ZNVER1: # BB#0:
2557; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2558; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
2559; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
2560; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
2561; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
2562; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
2563; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
2564; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
2565; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
2566; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
2567; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002568 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
2569 %2 = load <4 x float>, <4 x float> *%a2, align 4
2570 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
2571 %4 = or i32 %1, %3
2572 ret i32 %4
2573}
2574declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2575
2576define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2577; GENERIC-LABEL: test_unpckhps:
2578; GENERIC: # BB#0:
2579; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2580; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
2581; GENERIC-NEXT: retq
2582;
2583; ATOM-LABEL: test_unpckhps:
2584; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002585; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2586; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
2587; ATOM-NEXT: nop # sched: [1:0.50]
2588; ATOM-NEXT: nop # sched: [1:0.50]
2589; ATOM-NEXT: nop # sched: [1:0.50]
2590; ATOM-NEXT: nop # sched: [1:0.50]
2591; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002592;
2593; SLM-LABEL: test_unpckhps:
2594; SLM: # BB#0:
2595; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2596; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
2597; SLM-NEXT: retq # sched: [4:1.00]
2598;
2599; SANDY-LABEL: test_unpckhps:
2600; SANDY: # BB#0:
2601; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002602; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
2603; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002604;
2605; HASWELL-LABEL: test_unpckhps:
2606; HASWELL: # BB#0:
2607; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002608; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
2609; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002610;
2611; BTVER2-LABEL: test_unpckhps:
2612; BTVER2: # BB#0:
2613; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
2614; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
2615; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002616;
2617; ZNVER1-LABEL: test_unpckhps:
2618; ZNVER1: # BB#0:
2619; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
2620; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
2621; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002622 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2623 %2 = load <4 x float>, <4 x float> *%a2, align 16
2624 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2625 ret <4 x float> %3
2626}
2627
2628define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2629; GENERIC-LABEL: test_unpcklps:
2630; GENERIC: # BB#0:
2631; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2632; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2633; GENERIC-NEXT: retq
2634;
2635; ATOM-LABEL: test_unpcklps:
2636; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002637; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2638; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
2639; ATOM-NEXT: nop # sched: [1:0.50]
2640; ATOM-NEXT: nop # sched: [1:0.50]
2641; ATOM-NEXT: nop # sched: [1:0.50]
2642; ATOM-NEXT: nop # sched: [1:0.50]
2643; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002644;
2645; SLM-LABEL: test_unpcklps:
2646; SLM: # BB#0:
2647; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2648; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
2649; SLM-NEXT: retq # sched: [4:1.00]
2650;
2651; SANDY-LABEL: test_unpcklps:
2652; SANDY: # BB#0:
2653; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002654; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
2655; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002656;
2657; HASWELL-LABEL: test_unpcklps:
2658; HASWELL: # BB#0:
2659; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002660; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
2661; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002662;
2663; BTVER2-LABEL: test_unpcklps:
2664; BTVER2: # BB#0:
2665; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
2666; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
2667; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002668;
2669; ZNVER1-LABEL: test_unpcklps:
2670; ZNVER1: # BB#0:
2671; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
2672; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:0.50]
2673; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002674 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2675 %2 = load <4 x float>, <4 x float> *%a2, align 16
2676 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2677 ret <4 x float> %3
2678}
2679
2680define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2681; GENERIC-LABEL: test_xorps:
2682; GENERIC: # BB#0:
2683; GENERIC-NEXT: xorps %xmm1, %xmm0
2684; GENERIC-NEXT: xorps (%rdi), %xmm0
2685; GENERIC-NEXT: retq
2686;
2687; ATOM-LABEL: test_xorps:
2688; ATOM: # BB#0:
2689; ATOM-NEXT: xorps %xmm1, %xmm0
2690; ATOM-NEXT: xorps (%rdi), %xmm0
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002691; ATOM-NEXT: nop # sched: [1:0.50]
2692; ATOM-NEXT: nop # sched: [1:0.50]
2693; ATOM-NEXT: nop # sched: [1:0.50]
2694; ATOM-NEXT: nop # sched: [1:0.50]
2695; ATOM-NEXT: nop # sched: [1:0.50]
2696; ATOM-NEXT: nop # sched: [1:0.50]
2697; ATOM-NEXT: nop # sched: [1:0.50]
2698; ATOM-NEXT: nop # sched: [1:0.50]
2699; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002700;
2701; SLM-LABEL: test_xorps:
2702; SLM: # BB#0:
2703; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
2704; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
2705; SLM-NEXT: retq # sched: [4:1.00]
2706;
2707; SANDY-LABEL: test_xorps:
2708; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002709; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2710; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2711; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002712;
2713; HASWELL-LABEL: test_xorps:
2714; HASWELL: # BB#0:
2715; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002716; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
2717; HASWELL-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002718;
2719; BTVER2-LABEL: test_xorps:
2720; BTVER2: # BB#0:
2721; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2722; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2723; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002724;
2725; ZNVER1-LABEL: test_xorps:
2726; ZNVER1: # BB#0:
2727; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2728; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2729; ZNVER1-NEXT: retq # sched: [5:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002730 %1 = bitcast <4 x float> %a0 to <4 x i32>
2731 %2 = bitcast <4 x float> %a1 to <4 x i32>
2732 %3 = xor <4 x i32> %1, %2
2733 %4 = load <4 x float>, <4 x float> *%a2, align 16
2734 %5 = bitcast <4 x float> %4 to <4 x i32>
2735 %6 = xor <4 x i32> %3, %5
2736 %7 = bitcast <4 x i32> %6 to <4 x float>
2737 ret <4 x float> %7
2738}
2739
2740!0 = !{i32 1}