blob: e4ead498782b556ade5f5e9b661c38b9c7a3201b [file] [log] [blame]
Simon Pilgrim93986492017-04-18 19:04:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim93986492017-04-18 19:04:40 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrim93986492017-04-18 19:04:40 +00009; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
Craig Topper106b5b62017-07-19 02:45:14 +000010; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
Simon Pilgrim93986492017-04-18 19:04:40 +000011
12define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
13; GENERIC-LABEL: test_addps:
14; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000015; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
16; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
17; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000018;
19; ATOM-LABEL: test_addps:
20; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000021; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
22; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
23; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000024;
25; SLM-LABEL: test_addps:
26; SLM: # BB#0:
27; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
28; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
29; SLM-NEXT: retq # sched: [4:1.00]
30;
31; SANDY-LABEL: test_addps:
32; SANDY: # BB#0:
33; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000034; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
35; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000036;
37; HASWELL-LABEL: test_addps:
38; HASWELL: # BB#0:
39; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +000040; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
41; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000042;
Gadi Haber767d98b2017-08-30 08:08:50 +000043; SKYLAKE-LABEL: test_addps:
44; SKYLAKE: # BB#0:
45; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
46; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
47; SKYLAKE-NEXT: retq # sched: [2:1.00]
48;
Simon Pilgrim93986492017-04-18 19:04:40 +000049; BTVER2-LABEL: test_addps:
50; BTVER2: # BB#0:
51; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
52; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
53; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +000054;
55; ZNVER1-LABEL: test_addps:
56; ZNVER1: # BB#0:
57; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
58; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +000059; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000060 %1 = fadd <4 x float> %a0, %a1
61 %2 = load <4 x float>, <4 x float> *%a2, align 16
62 %3 = fadd <4 x float> %1, %2
63 ret <4 x float> %3
64}
65
66define float @test_addss(float %a0, float %a1, float *%a2) {
67; GENERIC-LABEL: test_addss:
68; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000069; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
70; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
71; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000072;
73; ATOM-LABEL: test_addss:
74; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000075; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
76; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
77; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000078;
79; SLM-LABEL: test_addss:
80; SLM: # BB#0:
81; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
82; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
83; SLM-NEXT: retq # sched: [4:1.00]
84;
85; SANDY-LABEL: test_addss:
86; SANDY: # BB#0:
87; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000088; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
89; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000090;
91; HASWELL-LABEL: test_addss:
92; HASWELL: # BB#0:
93; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +000094; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
95; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000096;
Gadi Haber767d98b2017-08-30 08:08:50 +000097; SKYLAKE-LABEL: test_addss:
98; SKYLAKE: # BB#0:
99; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
100; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
101; SKYLAKE-NEXT: retq # sched: [2:1.00]
102;
Simon Pilgrim93986492017-04-18 19:04:40 +0000103; BTVER2-LABEL: test_addss:
104; BTVER2: # BB#0:
105; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
106; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
107; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000108;
109; ZNVER1-LABEL: test_addss:
110; ZNVER1: # BB#0:
111; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
112; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000113; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000114 %1 = fadd float %a0, %a1
115 %2 = load float, float *%a2, align 4
116 %3 = fadd float %1, %2
117 ret float %3
118}
119
120define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
121; GENERIC-LABEL: test_andps:
122; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000123; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
124; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
125; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000126;
127; ATOM-LABEL: test_andps:
128; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000129; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
130; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000131; ATOM-NEXT: nop # sched: [1:0.50]
132; ATOM-NEXT: nop # sched: [1:0.50]
133; ATOM-NEXT: nop # sched: [1:0.50]
134; ATOM-NEXT: nop # sched: [1:0.50]
135; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000136;
137; SLM-LABEL: test_andps:
138; SLM: # BB#0:
139; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
140; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
141; SLM-NEXT: retq # sched: [4:1.00]
142;
143; SANDY-LABEL: test_andps:
144; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000145; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
146; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
147; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000148;
149; HASWELL-LABEL: test_andps:
150; HASWELL: # BB#0:
151; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000152; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
153; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000154;
Gadi Haber767d98b2017-08-30 08:08:50 +0000155; SKYLAKE-LABEL: test_andps:
156; SKYLAKE: # BB#0:
157; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
158; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
159; SKYLAKE-NEXT: retq # sched: [2:1.00]
160;
Simon Pilgrim93986492017-04-18 19:04:40 +0000161; BTVER2-LABEL: test_andps:
162; BTVER2: # BB#0:
163; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
164; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
165; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000166;
167; ZNVER1-LABEL: test_andps:
168; ZNVER1: # BB#0:
169; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
170; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000171; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000172 %1 = bitcast <4 x float> %a0 to <4 x i32>
173 %2 = bitcast <4 x float> %a1 to <4 x i32>
174 %3 = and <4 x i32> %1, %2
175 %4 = load <4 x float>, <4 x float> *%a2, align 16
176 %5 = bitcast <4 x float> %4 to <4 x i32>
177 %6 = and <4 x i32> %3, %5
178 %7 = bitcast <4 x i32> %6 to <4 x float>
179 ret <4 x float> %7
180}
181
182define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
183; GENERIC-LABEL: test_andnotps:
184; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000185; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
186; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
187; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000188;
189; ATOM-LABEL: test_andnotps:
190; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000191; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
192; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000193; ATOM-NEXT: nop # sched: [1:0.50]
194; ATOM-NEXT: nop # sched: [1:0.50]
195; ATOM-NEXT: nop # sched: [1:0.50]
196; ATOM-NEXT: nop # sched: [1:0.50]
197; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000198;
199; SLM-LABEL: test_andnotps:
200; SLM: # BB#0:
201; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
202; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
203; SLM-NEXT: retq # sched: [4:1.00]
204;
205; SANDY-LABEL: test_andnotps:
206; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000207; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
208; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
209; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000210;
211; HASWELL-LABEL: test_andnotps:
212; HASWELL: # BB#0:
213; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000214; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
215; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000216;
Gadi Haber767d98b2017-08-30 08:08:50 +0000217; SKYLAKE-LABEL: test_andnotps:
218; SKYLAKE: # BB#0:
219; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
220; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
221; SKYLAKE-NEXT: retq # sched: [2:1.00]
222;
Simon Pilgrim93986492017-04-18 19:04:40 +0000223; BTVER2-LABEL: test_andnotps:
224; BTVER2: # BB#0:
225; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
226; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
227; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000228;
229; ZNVER1-LABEL: test_andnotps:
230; ZNVER1: # BB#0:
231; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
232; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000233; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000234 %1 = bitcast <4 x float> %a0 to <4 x i32>
235 %2 = bitcast <4 x float> %a1 to <4 x i32>
236 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
237 %4 = and <4 x i32> %3, %2
238 %5 = load <4 x float>, <4 x float> *%a2, align 16
239 %6 = bitcast <4 x float> %5 to <4 x i32>
240 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
241 %8 = and <4 x i32> %6, %7
242 %9 = bitcast <4 x i32> %8 to <4 x float>
243 ret <4 x float> %9
244}
245
246define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
247; GENERIC-LABEL: test_cmpps:
248; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000249; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
250; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
251; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
252; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000253;
254; ATOM-LABEL: test_cmpps:
255; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000256; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
257; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
Simon Pilgrim486072d2017-08-01 17:51:20 +0000258; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000259; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000260;
261; SLM-LABEL: test_cmpps:
262; SLM: # BB#0:
263; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
264; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
265; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
266; SLM-NEXT: retq # sched: [4:1.00]
267;
268; SANDY-LABEL: test_cmpps:
269; SANDY: # BB#0:
270; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000271; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
272; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
273; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000274;
275; HASWELL-LABEL: test_cmpps:
276; HASWELL: # BB#0:
277; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000278; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000279; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000280; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000281;
Gadi Haber767d98b2017-08-30 08:08:50 +0000282; SKYLAKE-LABEL: test_cmpps:
283; SKYLAKE: # BB#0:
284; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
285; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
286; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
287; SKYLAKE-NEXT: retq # sched: [2:1.00]
288;
Simon Pilgrim93986492017-04-18 19:04:40 +0000289; BTVER2-LABEL: test_cmpps:
290; BTVER2: # BB#0:
291; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
292; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
293; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
294; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000295;
296; ZNVER1-LABEL: test_cmpps:
297; ZNVER1: # BB#0:
298; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
299; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
300; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000301; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000302 %1 = fcmp oeq <4 x float> %a0, %a1
303 %2 = load <4 x float>, <4 x float> *%a2, align 16
304 %3 = fcmp oeq <4 x float> %a0, %2
305 %4 = or <4 x i1> %1, %3
306 %5 = sext <4 x i1> %4 to <4 x i32>
307 %6 = bitcast <4 x i32> %5 to <4 x float>
308 ret <4 x float> %6
309}
310
311define float @test_cmpss(float %a0, float %a1, float *%a2) {
312; GENERIC-LABEL: test_cmpss:
313; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000314; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
315; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
316; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000317;
318; ATOM-LABEL: test_cmpss:
319; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000320; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
321; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
322; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000323;
324; SLM-LABEL: test_cmpss:
325; SLM: # BB#0:
326; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
327; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
328; SLM-NEXT: retq # sched: [4:1.00]
329;
330; SANDY-LABEL: test_cmpss:
331; SANDY: # BB#0:
332; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
333; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000334; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000335;
336; HASWELL-LABEL: test_cmpss:
337; HASWELL: # BB#0:
338; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
339; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000340; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000341;
Gadi Haber767d98b2017-08-30 08:08:50 +0000342; SKYLAKE-LABEL: test_cmpss:
343; SKYLAKE: # BB#0:
344; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
345; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
346; SKYLAKE-NEXT: retq # sched: [2:1.00]
347;
Simon Pilgrim93986492017-04-18 19:04:40 +0000348; BTVER2-LABEL: test_cmpss:
349; BTVER2: # BB#0:
350; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
351; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
352; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000353;
354; ZNVER1-LABEL: test_cmpss:
355; ZNVER1: # BB#0:
356; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
357; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000358; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000359 %1 = insertelement <4 x float> undef, float %a0, i32 0
360 %2 = insertelement <4 x float> undef, float %a1, i32 0
361 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
362 %4 = load float, float *%a2, align 4
363 %5 = insertelement <4 x float> undef, float %4, i32 0
364 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
365 %7 = extractelement <4 x float> %6, i32 0
366 ret float %7
367}
368declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
369
370define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
371; GENERIC-LABEL: test_comiss:
372; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000373; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000374; GENERIC-NEXT: setnp %al # sched: [1:0.50]
375; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000376; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
377; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000378; GENERIC-NEXT: setnp %al # sched: [1:0.50]
379; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000380; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
381; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
382; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
383; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000384;
385; ATOM-LABEL: test_comiss:
386; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000387; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
388; ATOM-NEXT: setnp %al # sched: [1:0.50]
389; ATOM-NEXT: sete %cl # sched: [1:0.50]
390; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
391; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
392; ATOM-NEXT: setnp %al # sched: [1:0.50]
393; ATOM-NEXT: sete %dl # sched: [1:0.50]
394; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
395; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
396; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
397; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000398;
399; SLM-LABEL: test_comiss:
400; SLM: # BB#0:
401; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
402; SLM-NEXT: setnp %al # sched: [1:0.50]
403; SLM-NEXT: sete %cl # sched: [1:0.50]
404; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
405; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
406; SLM-NEXT: setnp %al # sched: [1:0.50]
407; SLM-NEXT: sete %dl # sched: [1:0.50]
408; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
409; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
410; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
411; SLM-NEXT: retq # sched: [4:1.00]
412;
413; SANDY-LABEL: test_comiss:
414; SANDY: # BB#0:
415; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000416; SANDY-NEXT: setnp %al # sched: [1:0.50]
417; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000418; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
419; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000420; SANDY-NEXT: setnp %al # sched: [1:0.50]
421; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000422; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
423; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
424; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000425; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000426;
427; HASWELL-LABEL: test_comiss:
428; HASWELL: # BB#0:
429; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000430; HASWELL-NEXT: setnp %al # sched: [1:0.50]
431; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000432; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
433; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000434; HASWELL-NEXT: setnp %al # sched: [1:0.50]
435; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000436; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
437; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
438; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000439; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000440;
Gadi Haber767d98b2017-08-30 08:08:50 +0000441; SKYLAKE-LABEL: test_comiss:
442; SKYLAKE: # BB#0:
443; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
444; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
445; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
446; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
447; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
448; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
449; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
450; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
451; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
452; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
453; SKYLAKE-NEXT: retq # sched: [2:1.00]
454;
Simon Pilgrim93986492017-04-18 19:04:40 +0000455; BTVER2-LABEL: test_comiss:
456; BTVER2: # BB#0:
457; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
458; BTVER2-NEXT: setnp %al # sched: [1:0.50]
459; BTVER2-NEXT: sete %cl # sched: [1:0.50]
460; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
461; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
462; BTVER2-NEXT: setnp %al # sched: [1:0.50]
463; BTVER2-NEXT: sete %dl # sched: [1:0.50]
464; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
465; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
466; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
467; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000468;
469; ZNVER1-LABEL: test_comiss:
470; ZNVER1: # BB#0:
471; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
472; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
473; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
474; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
475; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
476; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
477; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
478; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
479; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
480; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000481; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000482 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
483 %2 = load <4 x float>, <4 x float> *%a2, align 4
484 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
485 %4 = or i32 %1, %3
486 ret i32 %4
487}
488declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
489
490define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
491; GENERIC-LABEL: test_cvtsi2ss:
492; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000493; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
494; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
495; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
496; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000497;
498; ATOM-LABEL: test_cvtsi2ss:
499; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000500; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
501; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
502; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
503; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000504;
505; SLM-LABEL: test_cvtsi2ss:
506; SLM: # BB#0:
507; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
508; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
509; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
510; SLM-NEXT: retq # sched: [4:1.00]
511;
512; SANDY-LABEL: test_cvtsi2ss:
513; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000514; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
515; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000516; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000517; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000518;
519; HASWELL-LABEL: test_cvtsi2ss:
520; HASWELL: # BB#0:
521; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
522; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
523; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000524; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000525;
Gadi Haber767d98b2017-08-30 08:08:50 +0000526; SKYLAKE-LABEL: test_cvtsi2ss:
527; SKYLAKE: # BB#0:
528; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
529; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
530; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
531; SKYLAKE-NEXT: retq # sched: [2:1.00]
532;
Simon Pilgrim93986492017-04-18 19:04:40 +0000533; BTVER2-LABEL: test_cvtsi2ss:
534; BTVER2: # BB#0:
535; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
536; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
537; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
538; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000539;
540; ZNVER1-LABEL: test_cvtsi2ss:
541; ZNVER1: # BB#0:
542; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
543; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
544; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000545; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000546 %1 = sitofp i32 %a0 to float
547 %2 = load i32, i32 *%a1, align 4
548 %3 = sitofp i32 %2 to float
549 %4 = fadd float %1, %3
550 ret float %4
551}
552
553define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
554; GENERIC-LABEL: test_cvtsi2ssq:
555; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000556; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
557; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
558; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
559; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000560;
561; ATOM-LABEL: test_cvtsi2ssq:
562; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000563; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
564; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
565; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
566; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000567;
568; SLM-LABEL: test_cvtsi2ssq:
569; SLM: # BB#0:
570; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
571; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
572; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
573; SLM-NEXT: retq # sched: [4:1.00]
574;
575; SANDY-LABEL: test_cvtsi2ssq:
576; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000577; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
578; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000579; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000580; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000581;
582; HASWELL-LABEL: test_cvtsi2ssq:
583; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000584; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000585; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
586; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000587; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000588;
Gadi Haber767d98b2017-08-30 08:08:50 +0000589; SKYLAKE-LABEL: test_cvtsi2ssq:
590; SKYLAKE: # BB#0:
591; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
592; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
593; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
594; SKYLAKE-NEXT: retq # sched: [2:1.00]
595;
Simon Pilgrim93986492017-04-18 19:04:40 +0000596; BTVER2-LABEL: test_cvtsi2ssq:
597; BTVER2: # BB#0:
598; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
599; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
600; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
601; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000602;
603; ZNVER1-LABEL: test_cvtsi2ssq:
604; ZNVER1: # BB#0:
605; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
606; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
607; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000608; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000609 %1 = sitofp i64 %a0 to float
610 %2 = load i64, i64 *%a1, align 8
611 %3 = sitofp i64 %2 to float
612 %4 = fadd float %1, %3
613 ret float %4
614}
615
616define i32 @test_cvtss2si(float %a0, float *%a1) {
617; GENERIC-LABEL: test_cvtss2si:
618; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000619; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
620; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
621; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
622; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000623;
624; ATOM-LABEL: test_cvtss2si:
625; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000626; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
627; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
628; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
629; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000630;
631; SLM-LABEL: test_cvtss2si:
632; SLM: # BB#0:
633; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
634; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
635; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
636; SLM-NEXT: retq # sched: [4:1.00]
637;
638; SANDY-LABEL: test_cvtss2si:
639; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000640; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
641; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000642; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000643; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000644;
645; HASWELL-LABEL: test_cvtss2si:
646; HASWELL: # BB#0:
647; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000648; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000649; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000650; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000651;
Gadi Haber767d98b2017-08-30 08:08:50 +0000652; SKYLAKE-LABEL: test_cvtss2si:
653; SKYLAKE: # BB#0:
654; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
655; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
656; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
657; SKYLAKE-NEXT: retq # sched: [2:1.00]
658;
Simon Pilgrim93986492017-04-18 19:04:40 +0000659; BTVER2-LABEL: test_cvtss2si:
660; BTVER2: # BB#0:
661; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
662; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
663; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
664; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000665;
666; ZNVER1-LABEL: test_cvtss2si:
667; ZNVER1: # BB#0:
668; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
669; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
670; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000671; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000672 %1 = insertelement <4 x float> undef, float %a0, i32 0
673 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
674 %3 = load float, float *%a1, align 4
675 %4 = insertelement <4 x float> undef, float %3, i32 0
676 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
677 %6 = add i32 %2, %5
678 ret i32 %6
679}
680declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
681
682define i64 @test_cvtss2siq(float %a0, float *%a1) {
683; GENERIC-LABEL: test_cvtss2siq:
684; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000685; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
686; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
687; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
688; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000689;
690; ATOM-LABEL: test_cvtss2siq:
691; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000692; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
693; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
694; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
695; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000696;
697; SLM-LABEL: test_cvtss2siq:
698; SLM: # BB#0:
699; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
700; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
701; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
702; SLM-NEXT: retq # sched: [4:1.00]
703;
704; SANDY-LABEL: test_cvtss2siq:
705; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000706; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
707; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000708; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000709; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000710;
711; HASWELL-LABEL: test_cvtss2siq:
712; HASWELL: # BB#0:
713; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000714; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000715; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000716; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000717;
Gadi Haber767d98b2017-08-30 08:08:50 +0000718; SKYLAKE-LABEL: test_cvtss2siq:
719; SKYLAKE: # BB#0:
720; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
721; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
722; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
723; SKYLAKE-NEXT: retq # sched: [2:1.00]
724;
Simon Pilgrim93986492017-04-18 19:04:40 +0000725; BTVER2-LABEL: test_cvtss2siq:
726; BTVER2: # BB#0:
727; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
728; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
729; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
730; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000731;
732; ZNVER1-LABEL: test_cvtss2siq:
733; ZNVER1: # BB#0:
734; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
735; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
736; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000737; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000738 %1 = insertelement <4 x float> undef, float %a0, i32 0
739 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
740 %3 = load float, float *%a1, align 4
741 %4 = insertelement <4 x float> undef, float %3, i32 0
742 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
743 %6 = add i64 %2, %5
744 ret i64 %6
745}
746declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
747
748define i32 @test_cvttss2si(float %a0, float *%a1) {
749; GENERIC-LABEL: test_cvttss2si:
750; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000751; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
752; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
753; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
754; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000755;
756; ATOM-LABEL: test_cvttss2si:
757; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000758; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
759; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
760; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
761; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000762;
763; SLM-LABEL: test_cvttss2si:
764; SLM: # BB#0:
765; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
766; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
767; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
768; SLM-NEXT: retq # sched: [4:1.00]
769;
770; SANDY-LABEL: test_cvttss2si:
771; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000772; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
773; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000774; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000775; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000776;
777; HASWELL-LABEL: test_cvttss2si:
778; HASWELL: # BB#0:
779; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000780; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000781; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000782; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000783;
Gadi Haber767d98b2017-08-30 08:08:50 +0000784; SKYLAKE-LABEL: test_cvttss2si:
785; SKYLAKE: # BB#0:
786; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
787; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
788; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
789; SKYLAKE-NEXT: retq # sched: [2:1.00]
790;
Simon Pilgrim93986492017-04-18 19:04:40 +0000791; BTVER2-LABEL: test_cvttss2si:
792; BTVER2: # BB#0:
793; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
794; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
795; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
796; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000797;
798; ZNVER1-LABEL: test_cvttss2si:
799; ZNVER1: # BB#0:
800; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
801; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
802; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000803; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000804 %1 = fptosi float %a0 to i32
805 %2 = load float, float *%a1, align 4
806 %3 = fptosi float %2 to i32
807 %4 = add i32 %1, %3
808 ret i32 %4
809}
810
811define i64 @test_cvttss2siq(float %a0, float *%a1) {
812; GENERIC-LABEL: test_cvttss2siq:
813; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000814; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
815; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
816; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
817; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000818;
819; ATOM-LABEL: test_cvttss2siq:
820; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000821; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
822; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
823; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
824; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000825;
826; SLM-LABEL: test_cvttss2siq:
827; SLM: # BB#0:
828; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
829; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
830; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
831; SLM-NEXT: retq # sched: [4:1.00]
832;
833; SANDY-LABEL: test_cvttss2siq:
834; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000835; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
836; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000837; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000838; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000839;
840; HASWELL-LABEL: test_cvttss2siq:
841; HASWELL: # BB#0:
842; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000843; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000844; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000845; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000846;
Gadi Haber767d98b2017-08-30 08:08:50 +0000847; SKYLAKE-LABEL: test_cvttss2siq:
848; SKYLAKE: # BB#0:
849; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
850; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
851; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
852; SKYLAKE-NEXT: retq # sched: [2:1.00]
853;
Simon Pilgrim93986492017-04-18 19:04:40 +0000854; BTVER2-LABEL: test_cvttss2siq:
855; BTVER2: # BB#0:
856; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
857; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
858; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
859; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000860;
861; ZNVER1-LABEL: test_cvttss2siq:
862; ZNVER1: # BB#0:
863; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
864; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
865; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000866; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000867 %1 = fptosi float %a0 to i64
868 %2 = load float, float *%a1, align 4
869 %3 = fptosi float %2 to i64
870 %4 = add i64 %1, %3
871 ret i64 %4
872}
873
874define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
875; GENERIC-LABEL: test_divps:
876; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000877; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00]
878; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00]
879; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000880;
881; ATOM-LABEL: test_divps:
882; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000883; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
884; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [125:62.50]
885; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000886;
887; SLM-LABEL: test_divps:
888; SLM: # BB#0:
889; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
890; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
891; SLM-NEXT: retq # sched: [4:1.00]
892;
893; SANDY-LABEL: test_divps:
894; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000895; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
896; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
897; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000898;
899; HASWELL-LABEL: test_divps:
900; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000901; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
902; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
903; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000904;
Gadi Haber767d98b2017-08-30 08:08:50 +0000905; SKYLAKE-LABEL: test_divps:
906; SKYLAKE: # BB#0:
907; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
908; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
909; SKYLAKE-NEXT: retq # sched: [2:1.00]
910;
Simon Pilgrim93986492017-04-18 19:04:40 +0000911; BTVER2-LABEL: test_divps:
912; BTVER2: # BB#0:
913; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
914; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
915; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000916;
917; ZNVER1-LABEL: test_divps:
918; ZNVER1: # BB#0:
919; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
920; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000921; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000922 %1 = fdiv <4 x float> %a0, %a1
923 %2 = load <4 x float>, <4 x float> *%a2, align 16
924 %3 = fdiv <4 x float> %1, %2
925 ret <4 x float> %3
926}
927
928define float @test_divss(float %a0, float %a1, float *%a2) {
929; GENERIC-LABEL: test_divss:
930; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000931; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00]
932; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00]
933; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000934;
935; ATOM-LABEL: test_divss:
936; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000937; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
938; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [62:31.00]
939; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000940;
941; SLM-LABEL: test_divss:
942; SLM: # BB#0:
943; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
944; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
945; SLM-NEXT: retq # sched: [4:1.00]
946;
947; SANDY-LABEL: test_divss:
948; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000949; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
950; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
951; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000952;
953; HASWELL-LABEL: test_divss:
954; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000955; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
956; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
957; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000958;
Gadi Haber767d98b2017-08-30 08:08:50 +0000959; SKYLAKE-LABEL: test_divss:
960; SKYLAKE: # BB#0:
961; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
962; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
963; SKYLAKE-NEXT: retq # sched: [2:1.00]
964;
Simon Pilgrim93986492017-04-18 19:04:40 +0000965; BTVER2-LABEL: test_divss:
966; BTVER2: # BB#0:
967; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
968; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
969; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000970;
971; ZNVER1-LABEL: test_divss:
972; ZNVER1: # BB#0:
973; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
974; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000975; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000976 %1 = fdiv float %a0, %a1
977 %2 = load float, float *%a2, align 4
978 %3 = fdiv float %1, %2
979 ret float %3
980}
981
982define void @test_ldmxcsr(i32 %a0) {
983; GENERIC-LABEL: test_ldmxcsr:
984; GENERIC: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +0000985; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +0000986; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
987; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000988;
989; ATOM-LABEL: test_ldmxcsr:
990; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000991; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
992; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
993; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000994;
995; SLM-LABEL: test_ldmxcsr:
996; SLM: # BB#0:
997; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
998; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
999; SLM-NEXT: retq # sched: [4:1.00]
1000;
1001; SANDY-LABEL: test_ldmxcsr:
1002; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001003; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001004; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1005; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001006;
1007; HASWELL-LABEL: test_ldmxcsr:
1008; HASWELL: # BB#0:
1009; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001010; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1011; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001012;
Gadi Haber767d98b2017-08-30 08:08:50 +00001013; SKYLAKE-LABEL: test_ldmxcsr:
1014; SKYLAKE: # BB#0:
1015; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1016; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1017; SKYLAKE-NEXT: retq # sched: [2:1.00]
1018;
Simon Pilgrim93986492017-04-18 19:04:40 +00001019; BTVER2-LABEL: test_ldmxcsr:
1020; BTVER2: # BB#0:
1021; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1022; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1023; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001024;
1025; ZNVER1-LABEL: test_ldmxcsr:
1026; ZNVER1: # BB#0:
1027; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001028; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
1029; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001030 %1 = alloca i32, align 4
1031 %2 = bitcast i32* %1 to i8*
1032 store i32 %a0, i32* %1
1033 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1034 ret void
1035}
1036declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
1037
1038define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1039; GENERIC-LABEL: test_maxps:
1040; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001041; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1042; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
1043; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001044;
1045; ATOM-LABEL: test_maxps:
1046; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001047; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
1048; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
1049; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001050;
1051; SLM-LABEL: test_maxps:
1052; SLM: # BB#0:
1053; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1054; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
1055; SLM-NEXT: retq # sched: [4:1.00]
1056;
1057; SANDY-LABEL: test_maxps:
1058; SANDY: # BB#0:
1059; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001060; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1061; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001062;
1063; HASWELL-LABEL: test_maxps:
1064; HASWELL: # BB#0:
1065; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001066; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1067; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001068;
Gadi Haber767d98b2017-08-30 08:08:50 +00001069; SKYLAKE-LABEL: test_maxps:
1070; SKYLAKE: # BB#0:
1071; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1072; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1073; SKYLAKE-NEXT: retq # sched: [2:1.00]
1074;
Simon Pilgrim93986492017-04-18 19:04:40 +00001075; BTVER2-LABEL: test_maxps:
1076; BTVER2: # BB#0:
1077; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1078; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1079; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001080;
1081; ZNVER1-LABEL: test_maxps:
1082; ZNVER1: # BB#0:
1083; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1084; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001085; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001086 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1087 %2 = load <4 x float>, <4 x float> *%a2, align 16
1088 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
1089 ret <4 x float> %3
1090}
1091declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1092
1093define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1094; GENERIC-LABEL: test_maxss:
1095; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001096; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1097; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
1098; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001099;
1100; ATOM-LABEL: test_maxss:
1101; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001102; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
1103; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
1104; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001105;
1106; SLM-LABEL: test_maxss:
1107; SLM: # BB#0:
1108; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1109; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
1110; SLM-NEXT: retq # sched: [4:1.00]
1111;
1112; SANDY-LABEL: test_maxss:
1113; SANDY: # BB#0:
1114; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001115; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1116; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001117;
1118; HASWELL-LABEL: test_maxss:
1119; HASWELL: # BB#0:
1120; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001121; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1122; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001123;
Gadi Haber767d98b2017-08-30 08:08:50 +00001124; SKYLAKE-LABEL: test_maxss:
1125; SKYLAKE: # BB#0:
1126; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1127; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1128; SKYLAKE-NEXT: retq # sched: [2:1.00]
1129;
Simon Pilgrim93986492017-04-18 19:04:40 +00001130; BTVER2-LABEL: test_maxss:
1131; BTVER2: # BB#0:
1132; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1133; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1134; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001135;
1136; ZNVER1-LABEL: test_maxss:
1137; ZNVER1: # BB#0:
1138; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1139; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001140; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001141 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1142 %2 = load <4 x float>, <4 x float> *%a2, align 16
1143 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
1144 ret <4 x float> %3
1145}
1146declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1147
1148define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1149; GENERIC-LABEL: test_minps:
1150; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001151; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1152; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
1153; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001154;
1155; ATOM-LABEL: test_minps:
1156; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001157; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
1158; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
1159; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001160;
1161; SLM-LABEL: test_minps:
1162; SLM: # BB#0:
1163; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1164; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
1165; SLM-NEXT: retq # sched: [4:1.00]
1166;
1167; SANDY-LABEL: test_minps:
1168; SANDY: # BB#0:
1169; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001170; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1171; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001172;
1173; HASWELL-LABEL: test_minps:
1174; HASWELL: # BB#0:
1175; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001176; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1177; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001178;
Gadi Haber767d98b2017-08-30 08:08:50 +00001179; SKYLAKE-LABEL: test_minps:
1180; SKYLAKE: # BB#0:
1181; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1182; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1183; SKYLAKE-NEXT: retq # sched: [2:1.00]
1184;
Simon Pilgrim93986492017-04-18 19:04:40 +00001185; BTVER2-LABEL: test_minps:
1186; BTVER2: # BB#0:
1187; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1188; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1189; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001190;
1191; ZNVER1-LABEL: test_minps:
1192; ZNVER1: # BB#0:
1193; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1194; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001195; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001196 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1197 %2 = load <4 x float>, <4 x float> *%a2, align 16
1198 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
1199 ret <4 x float> %3
1200}
1201declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1202
1203define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1204; GENERIC-LABEL: test_minss:
1205; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001206; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1207; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
1208; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001209;
1210; ATOM-LABEL: test_minss:
1211; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001212; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
1213; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
1214; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001215;
1216; SLM-LABEL: test_minss:
1217; SLM: # BB#0:
1218; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1219; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
1220; SLM-NEXT: retq # sched: [4:1.00]
1221;
1222; SANDY-LABEL: test_minss:
1223; SANDY: # BB#0:
1224; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001225; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1226; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001227;
1228; HASWELL-LABEL: test_minss:
1229; HASWELL: # BB#0:
1230; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001231; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1232; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001233;
Gadi Haber767d98b2017-08-30 08:08:50 +00001234; SKYLAKE-LABEL: test_minss:
1235; SKYLAKE: # BB#0:
1236; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1237; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1238; SKYLAKE-NEXT: retq # sched: [2:1.00]
1239;
Simon Pilgrim93986492017-04-18 19:04:40 +00001240; BTVER2-LABEL: test_minss:
1241; BTVER2: # BB#0:
1242; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1243; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1244; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001245;
1246; ZNVER1-LABEL: test_minss:
1247; ZNVER1: # BB#0:
1248; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1249; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001250; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001251 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1252 %2 = load <4 x float>, <4 x float> *%a2, align 16
1253 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
1254 ret <4 x float> %3
1255}
1256declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1257
1258define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
1259; GENERIC-LABEL: test_movaps:
1260; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001261; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
1262; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1263; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
1264; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001265;
1266; ATOM-LABEL: test_movaps:
1267; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001268; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
1269; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1270; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1271; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001272;
1273; SLM-LABEL: test_movaps:
1274; SLM: # BB#0:
1275; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1276; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1277; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1278; SLM-NEXT: retq # sched: [4:1.00]
1279;
1280; SANDY-LABEL: test_movaps:
1281; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001282; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001283; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001284; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
1285; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001286;
1287; HASWELL-LABEL: test_movaps:
1288; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001289; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001290; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001291; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001292; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001293;
Gadi Haber767d98b2017-08-30 08:08:50 +00001294; SKYLAKE-LABEL: test_movaps:
1295; SKYLAKE: # BB#0:
1296; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
1297; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1298; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1299; SKYLAKE-NEXT: retq # sched: [2:1.00]
1300;
Simon Pilgrim93986492017-04-18 19:04:40 +00001301; BTVER2-LABEL: test_movaps:
1302; BTVER2: # BB#0:
1303; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1304; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1305; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1306; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001307;
1308; ZNVER1-LABEL: test_movaps:
1309; ZNVER1: # BB#0:
1310; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
1311; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1312; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001313; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001314 %1 = load <4 x float>, <4 x float> *%a0, align 16
1315 %2 = fadd <4 x float> %1, %1
1316 store <4 x float> %2, <4 x float> *%a1, align 16
1317 ret void
1318}
1319
1320; TODO (v)movhlps
1321
1322define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1323; GENERIC-LABEL: test_movhlps:
1324; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001325; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1326; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001327;
1328; ATOM-LABEL: test_movhlps:
1329; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001330; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1331; ATOM-NEXT: nop # sched: [1:0.50]
1332; ATOM-NEXT: nop # sched: [1:0.50]
1333; ATOM-NEXT: nop # sched: [1:0.50]
1334; ATOM-NEXT: nop # sched: [1:0.50]
1335; ATOM-NEXT: nop # sched: [1:0.50]
1336; ATOM-NEXT: nop # sched: [1:0.50]
1337; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001338;
1339; SLM-LABEL: test_movhlps:
1340; SLM: # BB#0:
1341; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1342; SLM-NEXT: retq # sched: [4:1.00]
1343;
1344; SANDY-LABEL: test_movhlps:
1345; SANDY: # BB#0:
1346; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001347; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001348;
1349; HASWELL-LABEL: test_movhlps:
1350; HASWELL: # BB#0:
1351; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001352; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001353;
Gadi Haber767d98b2017-08-30 08:08:50 +00001354; SKYLAKE-LABEL: test_movhlps:
1355; SKYLAKE: # BB#0:
1356; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1357; SKYLAKE-NEXT: retq # sched: [2:1.00]
1358;
Simon Pilgrim93986492017-04-18 19:04:40 +00001359; BTVER2-LABEL: test_movhlps:
1360; BTVER2: # BB#0:
1361; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1362; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001363;
1364; ZNVER1-LABEL: test_movhlps:
1365; ZNVER1: # BB#0:
1366; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001367; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001368 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1369 ret <4 x float> %1
1370}
1371
1372; TODO (v)movhps
1373
1374define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1375; GENERIC-LABEL: test_movhps:
1376; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001377; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
1378; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1379; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1380; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1381; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001382;
1383; ATOM-LABEL: test_movhps:
1384; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001385; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1386; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1387; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1388; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1389; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001390;
1391; SLM-LABEL: test_movhps:
1392; SLM: # BB#0:
1393; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1394; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1395; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1396; SLM-NEXT: retq # sched: [4:1.00]
1397;
1398; SANDY-LABEL: test_movhps:
1399; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001400; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001401; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1402; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001403; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001404;
1405; HASWELL-LABEL: test_movhps:
1406; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001407; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001408; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001409; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1410; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001411;
Gadi Haber767d98b2017-08-30 08:08:50 +00001412; SKYLAKE-LABEL: test_movhps:
1413; SKYLAKE: # BB#0:
1414; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1415; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1416; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1417; SKYLAKE-NEXT: retq # sched: [2:1.00]
1418;
Simon Pilgrim93986492017-04-18 19:04:40 +00001419; BTVER2-LABEL: test_movhps:
1420; BTVER2: # BB#0:
1421; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1422; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1423; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1424; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001425;
1426; ZNVER1-LABEL: test_movhps:
1427; ZNVER1: # BB#0:
1428; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
1429; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1430; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001431; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001432 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1433 %2 = load <2 x float>, <2 x float> *%1, align 8
1434 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1435 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1436 %5 = fadd <4 x float> %a0, %4
1437 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1438 store <2 x float> %6, <2 x float>* %1
1439 ret void
1440}
1441
1442; TODO (v)movlhps
1443
1444define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1445; GENERIC-LABEL: test_movlhps:
1446; GENERIC: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001447; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +00001448; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1449; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001450;
1451; ATOM-LABEL: test_movlhps:
1452; ATOM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001453; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001454; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1455; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001456;
1457; SLM-LABEL: test_movlhps:
1458; SLM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001459; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001460; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1461; SLM-NEXT: retq # sched: [4:1.00]
1462;
1463; SANDY-LABEL: test_movlhps:
1464; SANDY: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001465; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001466; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001467; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001468;
1469; HASWELL-LABEL: test_movlhps:
1470; HASWELL: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001471; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001472; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001473; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001474;
Gadi Haber767d98b2017-08-30 08:08:50 +00001475; SKYLAKE-LABEL: test_movlhps:
1476; SKYLAKE: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001477; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001478; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1479; SKYLAKE-NEXT: retq # sched: [2:1.00]
1480;
Simon Pilgrim93986492017-04-18 19:04:40 +00001481; BTVER2-LABEL: test_movlhps:
1482; BTVER2: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001483; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001484; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1485; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001486;
1487; ZNVER1-LABEL: test_movlhps:
1488; ZNVER1: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001489; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00001490; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001491; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001492 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1493 %2 = fadd <4 x float> %a1, %1
1494 ret <4 x float> %2
1495}
1496
1497define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1498; GENERIC-LABEL: test_movlps:
1499; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001500; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
1501; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1502; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1503; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001504;
1505; ATOM-LABEL: test_movlps:
1506; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001507; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1508; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1509; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1510; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001511;
1512; SLM-LABEL: test_movlps:
1513; SLM: # BB#0:
1514; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1515; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1516; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1517; SLM-NEXT: retq # sched: [4:1.00]
1518;
1519; SANDY-LABEL: test_movlps:
1520; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001521; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001522; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001523; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
1524; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001525;
1526; HASWELL-LABEL: test_movlps:
1527; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001528; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001529; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001530; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001531; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001532;
Gadi Haber767d98b2017-08-30 08:08:50 +00001533; SKYLAKE-LABEL: test_movlps:
1534; SKYLAKE: # BB#0:
1535; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1536; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1537; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1538; SKYLAKE-NEXT: retq # sched: [2:1.00]
1539;
Simon Pilgrim93986492017-04-18 19:04:40 +00001540; BTVER2-LABEL: test_movlps:
1541; BTVER2: # BB#0:
1542; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1543; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1544; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1545; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001546;
1547; ZNVER1-LABEL: test_movlps:
1548; ZNVER1: # BB#0:
1549; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
1550; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1551; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001552; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001553 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1554 %2 = load <2 x float>, <2 x float> *%1, align 8
1555 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1556 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1557 %5 = fadd <4 x float> %a0, %4
1558 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1559 store <2 x float> %6, <2 x float>* %1
1560 ret void
1561}
1562
1563define i32 @test_movmskps(<4 x float> %a0) {
1564; GENERIC-LABEL: test_movmskps:
1565; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001566; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
1567; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001568;
1569; ATOM-LABEL: test_movmskps:
1570; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001571; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
1572; ATOM-NEXT: nop # sched: [1:0.50]
1573; ATOM-NEXT: nop # sched: [1:0.50]
1574; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001575;
1576; SLM-LABEL: test_movmskps:
1577; SLM: # BB#0:
1578; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1579; SLM-NEXT: retq # sched: [4:1.00]
1580;
1581; SANDY-LABEL: test_movmskps:
1582; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001583; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1584; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001585;
1586; HASWELL-LABEL: test_movmskps:
1587; HASWELL: # BB#0:
1588; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001589; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001590;
Gadi Haber767d98b2017-08-30 08:08:50 +00001591; SKYLAKE-LABEL: test_movmskps:
1592; SKYLAKE: # BB#0:
1593; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
1594; SKYLAKE-NEXT: retq # sched: [2:1.00]
1595;
Simon Pilgrim93986492017-04-18 19:04:40 +00001596; BTVER2-LABEL: test_movmskps:
1597; BTVER2: # BB#0:
1598; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
1599; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001600;
1601; ZNVER1-LABEL: test_movmskps:
1602; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001603; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
1604; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001605 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1606 ret i32 %1
1607}
1608declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1609
1610define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1611; GENERIC-LABEL: test_movntps:
1612; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001613; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
1614; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001615;
1616; ATOM-LABEL: test_movntps:
1617; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001618; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1619; ATOM-NEXT: nop # sched: [1:0.50]
1620; ATOM-NEXT: nop # sched: [1:0.50]
1621; ATOM-NEXT: nop # sched: [1:0.50]
1622; ATOM-NEXT: nop # sched: [1:0.50]
1623; ATOM-NEXT: nop # sched: [1:0.50]
1624; ATOM-NEXT: nop # sched: [1:0.50]
1625; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001626;
1627; SLM-LABEL: test_movntps:
1628; SLM: # BB#0:
1629; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1630; SLM-NEXT: retq # sched: [4:1.00]
1631;
1632; SANDY-LABEL: test_movntps:
1633; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001634; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
1635; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001636;
1637; HASWELL-LABEL: test_movntps:
1638; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001639; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001640; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001641;
Gadi Haber767d98b2017-08-30 08:08:50 +00001642; SKYLAKE-LABEL: test_movntps:
1643; SKYLAKE: # BB#0:
1644; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1645; SKYLAKE-NEXT: retq # sched: [2:1.00]
1646;
Simon Pilgrim93986492017-04-18 19:04:40 +00001647; BTVER2-LABEL: test_movntps:
1648; BTVER2: # BB#0:
1649; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1650; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001651;
1652; ZNVER1-LABEL: test_movntps:
1653; ZNVER1: # BB#0:
1654; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001655; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001656 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
1657 ret void
1658}
1659
1660define void @test_movss_mem(float* %a0, float* %a1) {
1661; GENERIC-LABEL: test_movss_mem:
1662; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001663; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1664; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1665; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
1666; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001667;
1668; ATOM-LABEL: test_movss_mem:
1669; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001670; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
1671; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
1672; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1673; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001674;
1675; SLM-LABEL: test_movss_mem:
1676; SLM: # BB#0:
1677; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
1678; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1679; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1680; SLM-NEXT: retq # sched: [4:1.00]
1681;
1682; SANDY-LABEL: test_movss_mem:
1683; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001684; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001685; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001686; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
1687; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001688;
1689; HASWELL-LABEL: test_movss_mem:
1690; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001691; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001692; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001693; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001694; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001695;
Gadi Haber767d98b2017-08-30 08:08:50 +00001696; SKYLAKE-LABEL: test_movss_mem:
1697; SKYLAKE: # BB#0:
1698; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
1699; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1700; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1701; SKYLAKE-NEXT: retq # sched: [2:1.00]
1702;
Simon Pilgrim93986492017-04-18 19:04:40 +00001703; BTVER2-LABEL: test_movss_mem:
1704; BTVER2: # BB#0:
1705; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
1706; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1707; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1708; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001709;
1710; ZNVER1-LABEL: test_movss_mem:
1711; ZNVER1: # BB#0:
1712; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
1713; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1714; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001715; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001716 %1 = load float, float* %a0, align 1
1717 %2 = fadd float %1, %1
1718 store float %2, float *%a1, align 1
1719 ret void
1720}
1721
1722define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
1723; GENERIC-LABEL: test_movss_reg:
1724; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001725; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1726; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001727;
1728; ATOM-LABEL: test_movss_reg:
1729; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001730; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1731; ATOM-NEXT: nop # sched: [1:0.50]
1732; ATOM-NEXT: nop # sched: [1:0.50]
1733; ATOM-NEXT: nop # sched: [1:0.50]
1734; ATOM-NEXT: nop # sched: [1:0.50]
1735; ATOM-NEXT: nop # sched: [1:0.50]
1736; ATOM-NEXT: nop # sched: [1:0.50]
1737; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001738;
1739; SLM-LABEL: test_movss_reg:
1740; SLM: # BB#0:
1741; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1742; SLM-NEXT: retq # sched: [4:1.00]
1743;
1744; SANDY-LABEL: test_movss_reg:
1745; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001746; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001747; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001748;
1749; HASWELL-LABEL: test_movss_reg:
1750; HASWELL: # BB#0:
1751; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001752; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001753;
Gadi Haber767d98b2017-08-30 08:08:50 +00001754; SKYLAKE-LABEL: test_movss_reg:
1755; SKYLAKE: # BB#0:
1756; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
1757; SKYLAKE-NEXT: retq # sched: [2:1.00]
1758;
Simon Pilgrim93986492017-04-18 19:04:40 +00001759; BTVER2-LABEL: test_movss_reg:
1760; BTVER2: # BB#0:
1761; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1762; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001763;
1764; ZNVER1-LABEL: test_movss_reg:
1765; ZNVER1: # BB#0:
1766; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001767; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001768 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1769 ret <4 x float> %1
1770}
1771
1772define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
1773; GENERIC-LABEL: test_movups:
1774; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001775; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
1776; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1777; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
1778; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001779;
1780; ATOM-LABEL: test_movups:
1781; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001782; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
1783; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1784; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
1785; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001786;
1787; SLM-LABEL: test_movups:
1788; SLM: # BB#0:
1789; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
1790; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1791; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
1792; SLM-NEXT: retq # sched: [4:1.00]
1793;
1794; SANDY-LABEL: test_movups:
1795; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001796; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001797; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001798; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
1799; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001800;
1801; HASWELL-LABEL: test_movups:
1802; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001803; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001804; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001805; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001806; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001807;
Gadi Haber767d98b2017-08-30 08:08:50 +00001808; SKYLAKE-LABEL: test_movups:
1809; SKYLAKE: # BB#0:
1810; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
1811; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1812; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1813; SKYLAKE-NEXT: retq # sched: [2:1.00]
1814;
Simon Pilgrim93986492017-04-18 19:04:40 +00001815; BTVER2-LABEL: test_movups:
1816; BTVER2: # BB#0:
1817; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
1818; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1819; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
1820; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001821;
1822; ZNVER1-LABEL: test_movups:
1823; ZNVER1: # BB#0:
1824; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
1825; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1826; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001827; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001828 %1 = load <4 x float>, <4 x float> *%a0, align 1
1829 %2 = fadd <4 x float> %1, %1
1830 store <4 x float> %2, <4 x float> *%a1, align 1
1831 ret void
1832}
1833
1834define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1835; GENERIC-LABEL: test_mulps:
1836; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001837; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
1838; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
1839; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001840;
1841; ATOM-LABEL: test_mulps:
1842; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001843; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
1844; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [10:5.00]
1845; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001846;
1847; SLM-LABEL: test_mulps:
1848; SLM: # BB#0:
1849; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
1850; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
1851; SLM-NEXT: retq # sched: [4:1.00]
1852;
1853; SANDY-LABEL: test_mulps:
1854; SANDY: # BB#0:
1855; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001856; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
1857; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001858;
1859; HASWELL-LABEL: test_mulps:
1860; HASWELL: # BB#0:
1861; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001862; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
1863; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001864;
Gadi Haber767d98b2017-08-30 08:08:50 +00001865; SKYLAKE-LABEL: test_mulps:
1866; SKYLAKE: # BB#0:
1867; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
1868; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
1869; SKYLAKE-NEXT: retq # sched: [2:1.00]
1870;
Simon Pilgrim93986492017-04-18 19:04:40 +00001871; BTVER2-LABEL: test_mulps:
1872; BTVER2: # BB#0:
1873; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1874; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1875; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001876;
1877; ZNVER1-LABEL: test_mulps:
1878; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001879; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
1880; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1881; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001882 %1 = fmul <4 x float> %a0, %a1
1883 %2 = load <4 x float>, <4 x float> *%a2, align 16
1884 %3 = fmul <4 x float> %1, %2
1885 ret <4 x float> %3
1886}
1887
1888define float @test_mulss(float %a0, float %a1, float *%a2) {
1889; GENERIC-LABEL: test_mulss:
1890; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001891; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
1892; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
1893; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001894;
1895; ATOM-LABEL: test_mulss:
1896; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001897; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
1898; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [5:5.00]
1899; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001900;
1901; SLM-LABEL: test_mulss:
1902; SLM: # BB#0:
1903; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
1904; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
1905; SLM-NEXT: retq # sched: [4:1.00]
1906;
1907; SANDY-LABEL: test_mulss:
1908; SANDY: # BB#0:
1909; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001910; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
1911; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001912;
1913; HASWELL-LABEL: test_mulss:
1914; HASWELL: # BB#0:
1915; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001916; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
1917; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001918;
Gadi Haber767d98b2017-08-30 08:08:50 +00001919; SKYLAKE-LABEL: test_mulss:
1920; SKYLAKE: # BB#0:
1921; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
1922; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
1923; SKYLAKE-NEXT: retq # sched: [2:1.00]
1924;
Simon Pilgrim93986492017-04-18 19:04:40 +00001925; BTVER2-LABEL: test_mulss:
1926; BTVER2: # BB#0:
1927; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
1928; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1929; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001930;
1931; ZNVER1-LABEL: test_mulss:
1932; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001933; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
1934; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1935; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001936 %1 = fmul float %a0, %a1
1937 %2 = load float, float *%a2, align 4
1938 %3 = fmul float %1, %2
1939 ret float %3
1940}
1941
1942define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1943; GENERIC-LABEL: test_orps:
1944; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001945; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
1946; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
1947; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001948;
1949; ATOM-LABEL: test_orps:
1950; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00001951; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
1952; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001953; ATOM-NEXT: nop # sched: [1:0.50]
1954; ATOM-NEXT: nop # sched: [1:0.50]
1955; ATOM-NEXT: nop # sched: [1:0.50]
1956; ATOM-NEXT: nop # sched: [1:0.50]
1957; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001958;
1959; SLM-LABEL: test_orps:
1960; SLM: # BB#0:
1961; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
1962; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
1963; SLM-NEXT: retq # sched: [4:1.00]
1964;
1965; SANDY-LABEL: test_orps:
1966; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001967; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1968; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
1969; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001970;
1971; HASWELL-LABEL: test_orps:
1972; HASWELL: # BB#0:
1973; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001974; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
1975; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001976;
Gadi Haber767d98b2017-08-30 08:08:50 +00001977; SKYLAKE-LABEL: test_orps:
1978; SKYLAKE: # BB#0:
1979; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1980; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
1981; SKYLAKE-NEXT: retq # sched: [2:1.00]
1982;
Simon Pilgrim93986492017-04-18 19:04:40 +00001983; BTVER2-LABEL: test_orps:
1984; BTVER2: # BB#0:
1985; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1986; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1987; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001988;
1989; ZNVER1-LABEL: test_orps:
1990; ZNVER1: # BB#0:
1991; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1992; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001993; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001994 %1 = bitcast <4 x float> %a0 to <4 x i32>
1995 %2 = bitcast <4 x float> %a1 to <4 x i32>
1996 %3 = or <4 x i32> %1, %2
1997 %4 = load <4 x float>, <4 x float> *%a2, align 16
1998 %5 = bitcast <4 x float> %4 to <4 x i32>
1999 %6 = or <4 x i32> %3, %5
2000 %7 = bitcast <4 x i32> %6 to <4 x float>
2001 ret <4 x float> %7
2002}
2003
2004define void @test_prefetchnta(i8* %a0) {
2005; GENERIC-LABEL: test_prefetchnta:
2006; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002007; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2008; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002009;
2010; ATOM-LABEL: test_prefetchnta:
2011; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002012; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
2013; ATOM-NEXT: nop # sched: [1:0.50]
2014; ATOM-NEXT: nop # sched: [1:0.50]
2015; ATOM-NEXT: nop # sched: [1:0.50]
2016; ATOM-NEXT: nop # sched: [1:0.50]
2017; ATOM-NEXT: nop # sched: [1:0.50]
2018; ATOM-NEXT: nop # sched: [1:0.50]
2019; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002020;
2021; SLM-LABEL: test_prefetchnta:
2022; SLM: # BB#0:
2023; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
2024; SLM-NEXT: retq # sched: [4:1.00]
2025;
2026; SANDY-LABEL: test_prefetchnta:
2027; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002028; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2029; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002030;
2031; HASWELL-LABEL: test_prefetchnta:
2032; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002033; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2034; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002035;
Gadi Haber767d98b2017-08-30 08:08:50 +00002036; SKYLAKE-LABEL: test_prefetchnta:
2037; SKYLAKE: # BB#0:
2038; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2039; SKYLAKE-NEXT: retq # sched: [2:1.00]
2040;
Simon Pilgrim93986492017-04-18 19:04:40 +00002041; BTVER2-LABEL: test_prefetchnta:
2042; BTVER2: # BB#0:
2043; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
2044; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002045;
2046; ZNVER1-LABEL: test_prefetchnta:
2047; ZNVER1: # BB#0:
2048; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002049; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002050 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
2051 ret void
2052}
2053declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
2054
2055define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
2056; GENERIC-LABEL: test_rcpps:
2057; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002058; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
2059; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
2060; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2061; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002062;
2063; ATOM-LABEL: test_rcpps:
2064; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002065; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
2066; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
2067; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2068; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2069; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002070;
2071; SLM-LABEL: test_rcpps:
2072; SLM: # BB#0:
2073; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
2074; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
2075; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2076; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2077; SLM-NEXT: retq # sched: [4:1.00]
2078;
2079; SANDY-LABEL: test_rcpps:
2080; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002081; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002082; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002083; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002084; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002085;
2086; HASWELL-LABEL: test_rcpps:
2087; HASWELL: # BB#0:
2088; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002089; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002090; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002091; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002092;
Gadi Haber767d98b2017-08-30 08:08:50 +00002093; SKYLAKE-LABEL: test_rcpps:
2094; SKYLAKE: # BB#0:
2095; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
2096; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
2097; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2098; SKYLAKE-NEXT: retq # sched: [2:1.00]
2099;
Simon Pilgrim93986492017-04-18 19:04:40 +00002100; BTVER2-LABEL: test_rcpps:
2101; BTVER2: # BB#0:
2102; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
2103; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
2104; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2105; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002106;
2107; ZNVER1-LABEL: test_rcpps:
2108; ZNVER1: # BB#0:
2109; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
2110; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
2111; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002112; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002113 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
2114 %2 = load <4 x float>, <4 x float> *%a1, align 16
2115 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
2116 %4 = fadd <4 x float> %1, %3
2117 ret <4 x float> %4
2118}
2119declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
2120
2121; TODO - rcpss_m
2122
2123define <4 x float> @test_rcpss(float %a0, float *%a1) {
2124; GENERIC-LABEL: test_rcpss:
2125; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002126; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
2127; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2128; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
2129; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2130; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002131;
2132; ATOM-LABEL: test_rcpss:
2133; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002134; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002135; ATOM-NEXT: rcpss %xmm0, %xmm0
2136; ATOM-NEXT: rcpss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002137; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2138; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002139;
2140; SLM-LABEL: test_rcpss:
2141; SLM: # BB#0:
2142; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2143; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
2144; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
2145; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2146; SLM-NEXT: retq # sched: [4:1.00]
2147;
2148; SANDY-LABEL: test_rcpss:
2149; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002150; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002151; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
Gadi Haberbed2c502017-08-13 13:59:24 +00002152; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002153; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002154; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002155;
2156; HASWELL-LABEL: test_rcpss:
2157; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002158; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2159; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2160; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002161; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002162; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002163;
Gadi Haber767d98b2017-08-30 08:08:50 +00002164; SKYLAKE-LABEL: test_rcpss:
2165; SKYLAKE: # BB#0:
2166; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2167; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2168; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2169; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2170; SKYLAKE-NEXT: retq # sched: [2:1.00]
2171;
Simon Pilgrim93986492017-04-18 19:04:40 +00002172; BTVER2-LABEL: test_rcpss:
2173; BTVER2: # BB#0:
2174; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2175; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2176; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2177; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2178; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002179;
2180; ZNVER1-LABEL: test_rcpss:
2181; ZNVER1: # BB#0:
2182; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2183; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
2184; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
2185; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002186; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002187 %1 = insertelement <4 x float> undef, float %a0, i32 0
2188 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
2189 %3 = load float, float *%a1, align 4
2190 %4 = insertelement <4 x float> undef, float %3, i32 0
2191 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
2192 %6 = fadd <4 x float> %2, %5
2193 ret <4 x float> %6
2194}
2195declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
2196
2197define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
2198; GENERIC-LABEL: test_rsqrtps:
2199; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002200; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
2201; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
2202; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2203; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002204;
2205; ATOM-LABEL: test_rsqrtps:
2206; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002207; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
2208; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
2209; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2210; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2211; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002212;
2213; SLM-LABEL: test_rsqrtps:
2214; SLM: # BB#0:
2215; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
2216; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2217; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2218; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2219; SLM-NEXT: retq # sched: [4:1.00]
2220;
2221; SANDY-LABEL: test_rsqrtps:
2222; SANDY: # BB#0:
2223; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002224; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002225; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002226; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002227;
2228; HASWELL-LABEL: test_rsqrtps:
2229; HASWELL: # BB#0:
2230; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002231; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002232; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002233; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002234;
Gadi Haber767d98b2017-08-30 08:08:50 +00002235; SKYLAKE-LABEL: test_rsqrtps:
2236; SKYLAKE: # BB#0:
2237; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2238; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
2239; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2240; SKYLAKE-NEXT: retq # sched: [2:1.00]
2241;
Simon Pilgrim93986492017-04-18 19:04:40 +00002242; BTVER2-LABEL: test_rsqrtps:
2243; BTVER2: # BB#0:
2244; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
2245; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
2246; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2247; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002248;
2249; ZNVER1-LABEL: test_rsqrtps:
2250; ZNVER1: # BB#0:
2251; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
2252; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
2253; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002254; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002255 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2256 %2 = load <4 x float>, <4 x float> *%a1, align 16
2257 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
2258 %4 = fadd <4 x float> %1, %3
2259 ret <4 x float> %4
2260}
2261declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2262
2263; TODO - rsqrtss_m
2264
2265define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
2266; GENERIC-LABEL: test_rsqrtss:
2267; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002268; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
2269; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2270; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
2271; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2272; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002273;
2274; ATOM-LABEL: test_rsqrtss:
2275; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002276; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002277; ATOM-NEXT: rsqrtss %xmm0, %xmm0
2278; ATOM-NEXT: rsqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002279; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2280; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002281;
2282; SLM-LABEL: test_rsqrtss:
2283; SLM: # BB#0:
2284; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2285; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
2286; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
2287; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2288; SLM-NEXT: retq # sched: [4:1.00]
2289;
2290; SANDY-LABEL: test_rsqrtss:
2291; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002292; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2293; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2294; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002295; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002296; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002297;
2298; HASWELL-LABEL: test_rsqrtss:
2299; HASWELL: # BB#0:
2300; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002301; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002302; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2303; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002304; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002305;
Gadi Haber767d98b2017-08-30 08:08:50 +00002306; SKYLAKE-LABEL: test_rsqrtss:
2307; SKYLAKE: # BB#0:
2308; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2309; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2310; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2311; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2312; SKYLAKE-NEXT: retq # sched: [2:1.00]
2313;
Simon Pilgrim93986492017-04-18 19:04:40 +00002314; BTVER2-LABEL: test_rsqrtss:
2315; BTVER2: # BB#0:
2316; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2317; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2318; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2319; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2320; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002321;
2322; ZNVER1-LABEL: test_rsqrtss:
2323; ZNVER1: # BB#0:
2324; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002325; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
2326; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00002327; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002328; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002329 %1 = insertelement <4 x float> undef, float %a0, i32 0
2330 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
2331 %3 = load float, float *%a1, align 4
2332 %4 = insertelement <4 x float> undef, float %3, i32 0
2333 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
2334 %6 = fadd <4 x float> %2, %5
2335 ret <4 x float> %6
2336}
2337declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2338
2339define void @test_sfence() {
2340; GENERIC-LABEL: test_sfence:
2341; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002342; GENERIC-NEXT: sfence # sched: [1:1.00]
2343; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002344;
2345; ATOM-LABEL: test_sfence:
2346; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002347; ATOM-NEXT: sfence # sched: [1:1.00]
2348; ATOM-NEXT: nop # sched: [1:0.50]
2349; ATOM-NEXT: nop # sched: [1:0.50]
2350; ATOM-NEXT: nop # sched: [1:0.50]
2351; ATOM-NEXT: nop # sched: [1:0.50]
2352; ATOM-NEXT: nop # sched: [1:0.50]
2353; ATOM-NEXT: nop # sched: [1:0.50]
2354; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002355;
2356; SLM-LABEL: test_sfence:
2357; SLM: # BB#0:
2358; SLM-NEXT: sfence # sched: [1:1.00]
2359; SLM-NEXT: retq # sched: [4:1.00]
2360;
2361; SANDY-LABEL: test_sfence:
2362; SANDY: # BB#0:
2363; SANDY-NEXT: sfence # sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002364; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002365;
2366; HASWELL-LABEL: test_sfence:
2367; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002368; HASWELL-NEXT: sfence # sched: [1:0.33]
2369; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002370;
Gadi Haber767d98b2017-08-30 08:08:50 +00002371; SKYLAKE-LABEL: test_sfence:
2372; SKYLAKE: # BB#0:
2373; SKYLAKE-NEXT: sfence # sched: [1:0.33]
2374; SKYLAKE-NEXT: retq # sched: [2:1.00]
2375;
Simon Pilgrim93986492017-04-18 19:04:40 +00002376; BTVER2-LABEL: test_sfence:
2377; BTVER2: # BB#0:
2378; BTVER2-NEXT: sfence # sched: [1:1.00]
2379; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002380;
2381; ZNVER1-LABEL: test_sfence:
2382; ZNVER1: # BB#0:
2383; ZNVER1-NEXT: sfence # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002384; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002385 call void @llvm.x86.sse.sfence()
2386 ret void
2387}
2388declare void @llvm.x86.sse.sfence() nounwind readnone
2389
2390define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
2391; GENERIC-LABEL: test_shufps:
2392; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002393; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2394; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2395; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002396;
2397; ATOM-LABEL: test_shufps:
2398; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002399; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2400; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2401; ATOM-NEXT: nop # sched: [1:0.50]
2402; ATOM-NEXT: nop # sched: [1:0.50]
2403; ATOM-NEXT: nop # sched: [1:0.50]
2404; ATOM-NEXT: nop # sched: [1:0.50]
2405; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002406;
2407; SLM-LABEL: test_shufps:
2408; SLM: # BB#0:
2409; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2410; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
2411; SLM-NEXT: retq # sched: [4:1.00]
2412;
2413; SANDY-LABEL: test_shufps:
2414; SANDY: # BB#0:
2415; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002416; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2417; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002418;
2419; HASWELL-LABEL: test_shufps:
2420; HASWELL: # BB#0:
2421; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002422; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2423; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002424;
Gadi Haber767d98b2017-08-30 08:08:50 +00002425; SKYLAKE-LABEL: test_shufps:
2426; SKYLAKE: # BB#0:
2427; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2428; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2429; SKYLAKE-NEXT: retq # sched: [2:1.00]
2430;
Simon Pilgrim93986492017-04-18 19:04:40 +00002431; BTVER2-LABEL: test_shufps:
2432; BTVER2: # BB#0:
2433; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2434; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2435; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002436;
2437; ZNVER1-LABEL: test_shufps:
2438; ZNVER1: # BB#0:
2439; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2440; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002441; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002442 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2443 %2 = load <4 x float>, <4 x float> *%a2, align 16
2444 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
2445 ret <4 x float> %3
2446}
2447
2448define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
2449; GENERIC-LABEL: test_sqrtps:
2450; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002451; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00]
2452; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00]
2453; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2454; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002455;
2456; ATOM-LABEL: test_sqrtps:
2457; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002458; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
2459; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
2460; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2461; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002462;
2463; SLM-LABEL: test_sqrtps:
2464; SLM: # BB#0:
2465; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
2466; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
2467; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2468; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2469; SLM-NEXT: retq # sched: [4:1.00]
2470;
2471; SANDY-LABEL: test_sqrtps:
2472; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002473; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2474; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002475; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002476; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002477;
2478; HASWELL-LABEL: test_sqrtps:
2479; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002480; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2481; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002482; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002483; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002484;
Gadi Haber767d98b2017-08-30 08:08:50 +00002485; SKYLAKE-LABEL: test_sqrtps:
2486; SKYLAKE: # BB#0:
2487; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2488; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
2489; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2490; SKYLAKE-NEXT: retq # sched: [2:1.00]
2491;
Simon Pilgrim93986492017-04-18 19:04:40 +00002492; BTVER2-LABEL: test_sqrtps:
2493; BTVER2: # BB#0:
2494; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
2495; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
2496; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2497; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002498;
2499; ZNVER1-LABEL: test_sqrtps:
2500; ZNVER1: # BB#0:
2501; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
2502; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
2503; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002504; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002505 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
2506 %2 = load <4 x float>, <4 x float> *%a1, align 16
2507 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
2508 %4 = fadd <4 x float> %1, %3
2509 ret <4 x float> %4
2510}
2511declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2512
2513; TODO - sqrtss_m
2514
2515define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
2516; GENERIC-LABEL: test_sqrtss:
2517; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002518; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00]
2519; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
2520; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00]
2521; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2522; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002523;
2524; ATOM-LABEL: test_sqrtss:
2525; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002526; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002527; ATOM-NEXT: sqrtss %xmm0, %xmm0
2528; ATOM-NEXT: sqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002529; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2530; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002531;
2532; SLM-LABEL: test_sqrtss:
2533; SLM: # BB#0:
2534; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
2535; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
2536; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
2537; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2538; SLM-NEXT: retq # sched: [4:1.00]
2539;
2540; SANDY-LABEL: test_sqrtss:
2541; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002542; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
2543; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
2544; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002545; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002546; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002547;
2548; HASWELL-LABEL: test_sqrtss:
2549; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002550; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
2551; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
2552; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002553; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002554; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002555;
Gadi Haber767d98b2017-08-30 08:08:50 +00002556; SKYLAKE-LABEL: test_sqrtss:
2557; SKYLAKE: # BB#0:
2558; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
2559; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
2560; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
2561; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2562; SKYLAKE-NEXT: retq # sched: [2:1.00]
2563;
Simon Pilgrim93986492017-04-18 19:04:40 +00002564; BTVER2-LABEL: test_sqrtss:
2565; BTVER2: # BB#0:
2566; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
2567; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
2568; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
2569; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2570; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002571;
2572; ZNVER1-LABEL: test_sqrtss:
2573; ZNVER1: # BB#0:
2574; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
2575; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [27:1.00]
2576; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [27:1.00]
2577; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002578; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002579 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
2580 %2 = load <4 x float>, <4 x float> *%a1, align 16
2581 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
2582 %4 = fadd <4 x float> %1, %3
2583 ret <4 x float> %4
2584}
2585declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2586
2587define i32 @test_stmxcsr() {
2588; GENERIC-LABEL: test_stmxcsr:
2589; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002590; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
2591; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2592; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002593;
2594; ATOM-LABEL: test_stmxcsr:
2595; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002596; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
2597; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
2598; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002599;
2600; SLM-LABEL: test_stmxcsr:
2601; SLM: # BB#0:
2602; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2603; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
2604; SLM-NEXT: retq # sched: [4:1.00]
2605;
2606; SANDY-LABEL: test_stmxcsr:
2607; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002608; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
2609; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2610; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002611;
2612; HASWELL-LABEL: test_stmxcsr:
2613; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002614; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2615; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
2616; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002617;
Gadi Haber767d98b2017-08-30 08:08:50 +00002618; SKYLAKE-LABEL: test_stmxcsr:
2619; SKYLAKE: # BB#0:
2620; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2621; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
2622; SKYLAKE-NEXT: retq # sched: [2:1.00]
2623;
Simon Pilgrim93986492017-04-18 19:04:40 +00002624; BTVER2-LABEL: test_stmxcsr:
2625; BTVER2: # BB#0:
2626; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2627; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
2628; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002629;
2630; ZNVER1-LABEL: test_stmxcsr:
2631; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002632; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
Craig Topper106b5b62017-07-19 02:45:14 +00002633; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002634; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002635 %1 = alloca i32, align 4
2636 %2 = bitcast i32* %1 to i8*
2637 call void @llvm.x86.sse.stmxcsr(i8* %2)
2638 %3 = load i32, i32* %1, align 4
2639 ret i32 %3
2640}
2641declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
2642
2643define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2644; GENERIC-LABEL: test_subps:
2645; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002646; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2647; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
2648; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002649;
2650; ATOM-LABEL: test_subps:
2651; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002652; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
2653; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
2654; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002655;
2656; SLM-LABEL: test_subps:
2657; SLM: # BB#0:
2658; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2659; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
2660; SLM-NEXT: retq # sched: [4:1.00]
2661;
2662; SANDY-LABEL: test_subps:
2663; SANDY: # BB#0:
2664; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002665; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2666; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002667;
2668; HASWELL-LABEL: test_subps:
2669; HASWELL: # BB#0:
2670; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002671; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
2672; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002673;
Gadi Haber767d98b2017-08-30 08:08:50 +00002674; SKYLAKE-LABEL: test_subps:
2675; SKYLAKE: # BB#0:
2676; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2677; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
2678; SKYLAKE-NEXT: retq # sched: [2:1.00]
2679;
Simon Pilgrim93986492017-04-18 19:04:40 +00002680; BTVER2-LABEL: test_subps:
2681; BTVER2: # BB#0:
2682; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2683; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2684; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002685;
2686; ZNVER1-LABEL: test_subps:
2687; ZNVER1: # BB#0:
2688; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2689; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002690; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002691 %1 = fsub <4 x float> %a0, %a1
2692 %2 = load <4 x float>, <4 x float> *%a2, align 16
2693 %3 = fsub <4 x float> %1, %2
2694 ret <4 x float> %3
2695}
2696
2697define float @test_subss(float %a0, float %a1, float *%a2) {
2698; GENERIC-LABEL: test_subss:
2699; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002700; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
2701; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
2702; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002703;
2704; ATOM-LABEL: test_subss:
2705; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002706; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
2707; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
2708; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002709;
2710; SLM-LABEL: test_subss:
2711; SLM: # BB#0:
2712; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
2713; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
2714; SLM-NEXT: retq # sched: [4:1.00]
2715;
2716; SANDY-LABEL: test_subss:
2717; SANDY: # BB#0:
2718; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002719; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2720; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002721;
2722; HASWELL-LABEL: test_subss:
2723; HASWELL: # BB#0:
2724; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002725; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
2726; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002727;
Gadi Haber767d98b2017-08-30 08:08:50 +00002728; SKYLAKE-LABEL: test_subss:
2729; SKYLAKE: # BB#0:
2730; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2731; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
2732; SKYLAKE-NEXT: retq # sched: [2:1.00]
2733;
Simon Pilgrim93986492017-04-18 19:04:40 +00002734; BTVER2-LABEL: test_subss:
2735; BTVER2: # BB#0:
2736; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2737; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2738; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002739;
2740; ZNVER1-LABEL: test_subss:
2741; ZNVER1: # BB#0:
2742; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2743; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002744; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002745 %1 = fsub float %a0, %a1
2746 %2 = load float, float *%a2, align 4
2747 %3 = fsub float %1, %2
2748 ret float %3
2749}
2750
2751define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2752; GENERIC-LABEL: test_ucomiss:
2753; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002754; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00002755; GENERIC-NEXT: setnp %al # sched: [1:0.50]
2756; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00002757; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
2758; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00002759; GENERIC-NEXT: setnp %al # sched: [1:0.50]
2760; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00002761; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
2762; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
2763; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
2764; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002765;
2766; ATOM-LABEL: test_ucomiss:
2767; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002768; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
2769; ATOM-NEXT: setnp %al # sched: [1:0.50]
2770; ATOM-NEXT: sete %cl # sched: [1:0.50]
2771; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
2772; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
2773; ATOM-NEXT: setnp %al # sched: [1:0.50]
2774; ATOM-NEXT: sete %dl # sched: [1:0.50]
2775; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
2776; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
2777; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
2778; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002779;
2780; SLM-LABEL: test_ucomiss:
2781; SLM: # BB#0:
2782; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
2783; SLM-NEXT: setnp %al # sched: [1:0.50]
2784; SLM-NEXT: sete %cl # sched: [1:0.50]
2785; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
2786; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
2787; SLM-NEXT: setnp %al # sched: [1:0.50]
2788; SLM-NEXT: sete %dl # sched: [1:0.50]
2789; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
2790; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
2791; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2792; SLM-NEXT: retq # sched: [4:1.00]
2793;
2794; SANDY-LABEL: test_ucomiss:
2795; SANDY: # BB#0:
2796; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00002797; SANDY-NEXT: setnp %al # sched: [1:0.50]
2798; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002799; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
2800; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00002801; SANDY-NEXT: setnp %al # sched: [1:0.50]
2802; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002803; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
2804; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
2805; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002806; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002807;
2808; HASWELL-LABEL: test_ucomiss:
2809; HASWELL: # BB#0:
2810; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002811; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2812; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002813; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
2814; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002815; HASWELL-NEXT: setnp %al # sched: [1:0.50]
2816; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002817; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
2818; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
2819; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002820; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002821;
Gadi Haber767d98b2017-08-30 08:08:50 +00002822; SKYLAKE-LABEL: test_ucomiss:
2823; SKYLAKE: # BB#0:
2824; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2825; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
2826; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
2827; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
2828; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
2829; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
2830; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
2831; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
2832; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
2833; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
2834; SKYLAKE-NEXT: retq # sched: [2:1.00]
2835;
Simon Pilgrim93986492017-04-18 19:04:40 +00002836; BTVER2-LABEL: test_ucomiss:
2837; BTVER2: # BB#0:
2838; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2839; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2840; BTVER2-NEXT: sete %cl # sched: [1:0.50]
2841; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
2842; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
2843; BTVER2-NEXT: setnp %al # sched: [1:0.50]
2844; BTVER2-NEXT: sete %dl # sched: [1:0.50]
2845; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
2846; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
2847; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
2848; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002849;
2850; ZNVER1-LABEL: test_ucomiss:
2851; ZNVER1: # BB#0:
2852; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
2853; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
2854; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
2855; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
2856; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
2857; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
2858; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
2859; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
2860; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
2861; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002862; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002863 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
2864 %2 = load <4 x float>, <4 x float> *%a2, align 4
2865 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
2866 %4 = or i32 %1, %3
2867 ret i32 %4
2868}
2869declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2870
2871define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2872; GENERIC-LABEL: test_unpckhps:
2873; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002874; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2875; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
2876; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002877;
2878; ATOM-LABEL: test_unpckhps:
2879; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002880; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2881; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
2882; ATOM-NEXT: nop # sched: [1:0.50]
2883; ATOM-NEXT: nop # sched: [1:0.50]
2884; ATOM-NEXT: nop # sched: [1:0.50]
2885; ATOM-NEXT: nop # sched: [1:0.50]
2886; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002887;
2888; SLM-LABEL: test_unpckhps:
2889; SLM: # BB#0:
2890; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2891; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
2892; SLM-NEXT: retq # sched: [4:1.00]
2893;
2894; SANDY-LABEL: test_unpckhps:
2895; SANDY: # BB#0:
2896; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002897; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
2898; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002899;
2900; HASWELL-LABEL: test_unpckhps:
2901; HASWELL: # BB#0:
2902; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002903; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
2904; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002905;
Gadi Haber767d98b2017-08-30 08:08:50 +00002906; SKYLAKE-LABEL: test_unpckhps:
2907; SKYLAKE: # BB#0:
2908; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
2909; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
2910; SKYLAKE-NEXT: retq # sched: [2:1.00]
2911;
Simon Pilgrim93986492017-04-18 19:04:40 +00002912; BTVER2-LABEL: test_unpckhps:
2913; BTVER2: # BB#0:
2914; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
2915; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
2916; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002917;
2918; ZNVER1-LABEL: test_unpckhps:
2919; ZNVER1: # BB#0:
2920; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
2921; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002922; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002923 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2924 %2 = load <4 x float>, <4 x float> *%a2, align 16
2925 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2926 ret <4 x float> %3
2927}
2928
2929define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2930; GENERIC-LABEL: test_unpcklps:
2931; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002932; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2933; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
2934; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002935;
2936; ATOM-LABEL: test_unpcklps:
2937; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002938; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2939; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
2940; ATOM-NEXT: nop # sched: [1:0.50]
2941; ATOM-NEXT: nop # sched: [1:0.50]
2942; ATOM-NEXT: nop # sched: [1:0.50]
2943; ATOM-NEXT: nop # sched: [1:0.50]
2944; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002945;
2946; SLM-LABEL: test_unpcklps:
2947; SLM: # BB#0:
2948; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2949; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
2950; SLM-NEXT: retq # sched: [4:1.00]
2951;
2952; SANDY-LABEL: test_unpcklps:
2953; SANDY: # BB#0:
2954; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002955; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
2956; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002957;
2958; HASWELL-LABEL: test_unpcklps:
2959; HASWELL: # BB#0:
2960; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002961; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
2962; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002963;
Gadi Haber767d98b2017-08-30 08:08:50 +00002964; SKYLAKE-LABEL: test_unpcklps:
2965; SKYLAKE: # BB#0:
2966; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
2967; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
2968; SKYLAKE-NEXT: retq # sched: [2:1.00]
2969;
Simon Pilgrim93986492017-04-18 19:04:40 +00002970; BTVER2-LABEL: test_unpcklps:
2971; BTVER2: # BB#0:
2972; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
2973; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
2974; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002975;
2976; ZNVER1-LABEL: test_unpcklps:
2977; ZNVER1: # BB#0:
2978; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
2979; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002980; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002981 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2982 %2 = load <4 x float>, <4 x float> *%a2, align 16
2983 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2984 ret <4 x float> %3
2985}
2986
2987define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2988; GENERIC-LABEL: test_xorps:
2989; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002990; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
2991; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
2992; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002993;
2994; ATOM-LABEL: test_xorps:
2995; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00002996; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
2997; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002998; ATOM-NEXT: nop # sched: [1:0.50]
2999; ATOM-NEXT: nop # sched: [1:0.50]
3000; ATOM-NEXT: nop # sched: [1:0.50]
3001; ATOM-NEXT: nop # sched: [1:0.50]
3002; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003003;
3004; SLM-LABEL: test_xorps:
3005; SLM: # BB#0:
3006; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3007; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
3008; SLM-NEXT: retq # sched: [4:1.00]
3009;
3010; SANDY-LABEL: test_xorps:
3011; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00003012; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3013; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3014; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003015;
3016; HASWELL-LABEL: test_xorps:
3017; HASWELL: # BB#0:
3018; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003019; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3020; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003021;
Gadi Haber767d98b2017-08-30 08:08:50 +00003022; SKYLAKE-LABEL: test_xorps:
3023; SKYLAKE: # BB#0:
3024; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3025; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3026; SKYLAKE-NEXT: retq # sched: [2:1.00]
3027;
Simon Pilgrim93986492017-04-18 19:04:40 +00003028; BTVER2-LABEL: test_xorps:
3029; BTVER2: # BB#0:
3030; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3031; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3032; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003033;
3034; ZNVER1-LABEL: test_xorps:
3035; ZNVER1: # BB#0:
3036; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
3037; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003038; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003039 %1 = bitcast <4 x float> %a0 to <4 x i32>
3040 %2 = bitcast <4 x float> %a1 to <4 x i32>
3041 %3 = xor <4 x i32> %1, %2
3042 %4 = load <4 x float>, <4 x float> *%a2, align 16
3043 %5 = bitcast <4 x float> %4 to <4 x i32>
3044 %6 = xor <4 x i32> %3, %5
3045 %7 = bitcast <4 x i32> %6 to <4 x float>
3046 ret <4 x float> %7
3047}
3048
3049!0 = !{i32 1}