blob: 954c2f54449d76f5711d6450cb5ff922eb98d589 [file] [log] [blame]
Simon Pilgrim93986492017-04-18 19:04:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim93986492017-04-18 19:04:40 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00009; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim93986492017-04-18 19:04:40 +000010; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
Craig Topper106b5b62017-07-19 02:45:14 +000011; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
Simon Pilgrim93986492017-04-18 19:04:40 +000012
13define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
14; GENERIC-LABEL: test_addps:
15; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000016; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
17; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
18; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000019;
20; ATOM-LABEL: test_addps:
21; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000022; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
23; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
24; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000025;
26; SLM-LABEL: test_addps:
27; SLM: # BB#0:
28; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
29; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
30; SLM-NEXT: retq # sched: [4:1.00]
31;
32; SANDY-LABEL: test_addps:
33; SANDY: # BB#0:
34; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000035; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
36; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000037;
38; HASWELL-LABEL: test_addps:
39; HASWELL: # BB#0:
40; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +000041; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
42; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000043;
Gadi Haber767d98b2017-08-30 08:08:50 +000044; SKYLAKE-LABEL: test_addps:
45; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +000046; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
47; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +000048; SKYLAKE-NEXT: retq # sched: [2:1.00]
49;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000050; SKX-LABEL: test_addps:
51; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +000052; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
53; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
54; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000055;
Simon Pilgrim93986492017-04-18 19:04:40 +000056; BTVER2-LABEL: test_addps:
57; BTVER2: # BB#0:
58; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
59; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
60; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +000061;
62; ZNVER1-LABEL: test_addps:
63; ZNVER1: # BB#0:
64; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
65; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +000066; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000067 %1 = fadd <4 x float> %a0, %a1
68 %2 = load <4 x float>, <4 x float> *%a2, align 16
69 %3 = fadd <4 x float> %1, %2
70 ret <4 x float> %3
71}
72
73define float @test_addss(float %a0, float %a1, float *%a2) {
74; GENERIC-LABEL: test_addss:
75; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000076; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
77; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
78; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000079;
80; ATOM-LABEL: test_addss:
81; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000082; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
83; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
84; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000085;
86; SLM-LABEL: test_addss:
87; SLM: # BB#0:
88; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
89; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
90; SLM-NEXT: retq # sched: [4:1.00]
91;
92; SANDY-LABEL: test_addss:
93; SANDY: # BB#0:
94; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000095; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
96; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000097;
98; HASWELL-LABEL: test_addss:
99; HASWELL: # BB#0:
100; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000101; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
102; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000103;
Gadi Haber767d98b2017-08-30 08:08:50 +0000104; SKYLAKE-LABEL: test_addss:
105; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000106; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
107; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000108; SKYLAKE-NEXT: retq # sched: [2:1.00]
109;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000110; SKX-LABEL: test_addss:
111; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000112; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
113; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
114; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000115;
Simon Pilgrim93986492017-04-18 19:04:40 +0000116; BTVER2-LABEL: test_addss:
117; BTVER2: # BB#0:
118; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
119; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
120; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000121;
122; ZNVER1-LABEL: test_addss:
123; ZNVER1: # BB#0:
124; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
125; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000126; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000127 %1 = fadd float %a0, %a1
128 %2 = load float, float *%a2, align 4
129 %3 = fadd float %1, %2
130 ret float %3
131}
132
133define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
134; GENERIC-LABEL: test_andps:
135; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000136; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
137; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
138; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000139;
140; ATOM-LABEL: test_andps:
141; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000142; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
143; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000144; ATOM-NEXT: nop # sched: [1:0.50]
145; ATOM-NEXT: nop # sched: [1:0.50]
146; ATOM-NEXT: nop # sched: [1:0.50]
147; ATOM-NEXT: nop # sched: [1:0.50]
148; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000149;
150; SLM-LABEL: test_andps:
151; SLM: # BB#0:
152; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
153; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
154; SLM-NEXT: retq # sched: [4:1.00]
155;
156; SANDY-LABEL: test_andps:
157; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000158; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
159; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
160; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000161;
162; HASWELL-LABEL: test_andps:
163; HASWELL: # BB#0:
164; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000165; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
166; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000167;
Gadi Haber767d98b2017-08-30 08:08:50 +0000168; SKYLAKE-LABEL: test_andps:
169; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000170; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
171; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000172; SKYLAKE-NEXT: retq # sched: [2:1.00]
173;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000174; SKX-LABEL: test_andps:
175; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000176; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
177; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
178; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000179;
Simon Pilgrim93986492017-04-18 19:04:40 +0000180; BTVER2-LABEL: test_andps:
181; BTVER2: # BB#0:
182; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
183; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
184; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000185;
186; ZNVER1-LABEL: test_andps:
187; ZNVER1: # BB#0:
188; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
189; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000190; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000191 %1 = bitcast <4 x float> %a0 to <4 x i32>
192 %2 = bitcast <4 x float> %a1 to <4 x i32>
193 %3 = and <4 x i32> %1, %2
194 %4 = load <4 x float>, <4 x float> *%a2, align 16
195 %5 = bitcast <4 x float> %4 to <4 x i32>
196 %6 = and <4 x i32> %3, %5
197 %7 = bitcast <4 x i32> %6 to <4 x float>
198 ret <4 x float> %7
199}
200
201define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
202; GENERIC-LABEL: test_andnotps:
203; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000204; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
205; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
206; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000207;
208; ATOM-LABEL: test_andnotps:
209; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000210; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
211; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000212; ATOM-NEXT: nop # sched: [1:0.50]
213; ATOM-NEXT: nop # sched: [1:0.50]
214; ATOM-NEXT: nop # sched: [1:0.50]
215; ATOM-NEXT: nop # sched: [1:0.50]
216; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000217;
218; SLM-LABEL: test_andnotps:
219; SLM: # BB#0:
220; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
221; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
222; SLM-NEXT: retq # sched: [4:1.00]
223;
224; SANDY-LABEL: test_andnotps:
225; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000226; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
227; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
228; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000229;
230; HASWELL-LABEL: test_andnotps:
231; HASWELL: # BB#0:
232; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000233; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
234; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000235;
Gadi Haber767d98b2017-08-30 08:08:50 +0000236; SKYLAKE-LABEL: test_andnotps:
237; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000238; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
239; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000240; SKYLAKE-NEXT: retq # sched: [2:1.00]
241;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000242; SKX-LABEL: test_andnotps:
243; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000244; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
245; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
246; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000247;
Simon Pilgrim93986492017-04-18 19:04:40 +0000248; BTVER2-LABEL: test_andnotps:
249; BTVER2: # BB#0:
250; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
251; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
252; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000253;
254; ZNVER1-LABEL: test_andnotps:
255; ZNVER1: # BB#0:
256; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
257; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000258; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000259 %1 = bitcast <4 x float> %a0 to <4 x i32>
260 %2 = bitcast <4 x float> %a1 to <4 x i32>
261 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
262 %4 = and <4 x i32> %3, %2
263 %5 = load <4 x float>, <4 x float> *%a2, align 16
264 %6 = bitcast <4 x float> %5 to <4 x i32>
265 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
266 %8 = and <4 x i32> %6, %7
267 %9 = bitcast <4 x i32> %8 to <4 x float>
268 ret <4 x float> %9
269}
270
271define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
272; GENERIC-LABEL: test_cmpps:
273; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000274; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
275; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
276; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
277; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000278;
279; ATOM-LABEL: test_cmpps:
280; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000281; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
282; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
Simon Pilgrim486072d2017-08-01 17:51:20 +0000283; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000284; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000285;
286; SLM-LABEL: test_cmpps:
287; SLM: # BB#0:
288; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
289; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
290; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
291; SLM-NEXT: retq # sched: [4:1.00]
292;
293; SANDY-LABEL: test_cmpps:
294; SANDY: # BB#0:
295; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000296; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
297; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
298; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000299;
300; HASWELL-LABEL: test_cmpps:
301; HASWELL: # BB#0:
302; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000303; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000304; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000305; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000306;
Gadi Haber767d98b2017-08-30 08:08:50 +0000307; SKYLAKE-LABEL: test_cmpps:
308; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000309; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
310; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
311; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000312; SKYLAKE-NEXT: retq # sched: [2:1.00]
313;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000314; SKX-LABEL: test_cmpps:
315; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000316; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00]
317; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00]
318; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000319; SKX-NEXT: vpmovm2d %k0, %xmm0
Gadi Haber684944b2017-10-08 12:52:54 +0000320; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000321;
Simon Pilgrim93986492017-04-18 19:04:40 +0000322; BTVER2-LABEL: test_cmpps:
323; BTVER2: # BB#0:
324; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
325; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
326; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
327; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000328;
329; ZNVER1-LABEL: test_cmpps:
330; ZNVER1: # BB#0:
331; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
332; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
333; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000334; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000335 %1 = fcmp oeq <4 x float> %a0, %a1
336 %2 = load <4 x float>, <4 x float> *%a2, align 16
337 %3 = fcmp oeq <4 x float> %a0, %2
338 %4 = or <4 x i1> %1, %3
339 %5 = sext <4 x i1> %4 to <4 x i32>
340 %6 = bitcast <4 x i32> %5 to <4 x float>
341 ret <4 x float> %6
342}
343
344define float @test_cmpss(float %a0, float %a1, float *%a2) {
345; GENERIC-LABEL: test_cmpss:
346; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000347; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
348; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
349; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000350;
351; ATOM-LABEL: test_cmpss:
352; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000353; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
354; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
355; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000356;
357; SLM-LABEL: test_cmpss:
358; SLM: # BB#0:
359; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
360; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
361; SLM-NEXT: retq # sched: [4:1.00]
362;
363; SANDY-LABEL: test_cmpss:
364; SANDY: # BB#0:
365; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
366; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000367; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000368;
369; HASWELL-LABEL: test_cmpss:
370; HASWELL: # BB#0:
371; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
372; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000373; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000374;
Gadi Haber767d98b2017-08-30 08:08:50 +0000375; SKYLAKE-LABEL: test_cmpss:
376; SKYLAKE: # BB#0:
377; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000378; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000379; SKYLAKE-NEXT: retq # sched: [2:1.00]
380;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000381; SKX-LABEL: test_cmpss:
382; SKX: # BB#0:
383; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
384; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000385; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000386;
Simon Pilgrim93986492017-04-18 19:04:40 +0000387; BTVER2-LABEL: test_cmpss:
388; BTVER2: # BB#0:
389; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
390; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
391; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000392;
393; ZNVER1-LABEL: test_cmpss:
394; ZNVER1: # BB#0:
395; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
396; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000397; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000398 %1 = insertelement <4 x float> undef, float %a0, i32 0
399 %2 = insertelement <4 x float> undef, float %a1, i32 0
400 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
401 %4 = load float, float *%a2, align 4
402 %5 = insertelement <4 x float> undef, float %4, i32 0
403 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
404 %7 = extractelement <4 x float> %6, i32 0
405 ret float %7
406}
407declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
408
409define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
410; GENERIC-LABEL: test_comiss:
411; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000412; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000413; GENERIC-NEXT: setnp %al # sched: [1:0.50]
414; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000415; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
416; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000417; GENERIC-NEXT: setnp %al # sched: [1:0.50]
418; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000419; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
420; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
421; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
422; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000423;
424; ATOM-LABEL: test_comiss:
425; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000426; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
427; ATOM-NEXT: setnp %al # sched: [1:0.50]
428; ATOM-NEXT: sete %cl # sched: [1:0.50]
429; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
430; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
431; ATOM-NEXT: setnp %al # sched: [1:0.50]
432; ATOM-NEXT: sete %dl # sched: [1:0.50]
433; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
434; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
435; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
436; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000437;
438; SLM-LABEL: test_comiss:
439; SLM: # BB#0:
440; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
441; SLM-NEXT: setnp %al # sched: [1:0.50]
442; SLM-NEXT: sete %cl # sched: [1:0.50]
443; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
444; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
445; SLM-NEXT: setnp %al # sched: [1:0.50]
446; SLM-NEXT: sete %dl # sched: [1:0.50]
447; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
448; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
449; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
450; SLM-NEXT: retq # sched: [4:1.00]
451;
452; SANDY-LABEL: test_comiss:
453; SANDY: # BB#0:
454; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000455; SANDY-NEXT: setnp %al # sched: [1:0.50]
456; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000457; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
458; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000459; SANDY-NEXT: setnp %al # sched: [1:0.50]
460; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000461; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
462; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
463; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000464; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000465;
466; HASWELL-LABEL: test_comiss:
467; HASWELL: # BB#0:
468; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000469; HASWELL-NEXT: setnp %al # sched: [1:0.50]
470; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000471; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
472; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000473; HASWELL-NEXT: setnp %al # sched: [1:0.50]
474; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000475; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
476; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
477; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000478; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000479;
Gadi Haber767d98b2017-08-30 08:08:50 +0000480; SKYLAKE-LABEL: test_comiss:
481; SKYLAKE: # BB#0:
482; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000483; SKYLAKE-NEXT: setnp %al # sched: [1:1.00]
484; SKYLAKE-NEXT: sete %cl # sched: [1:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000485; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000486; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
487; SKYLAKE-NEXT: setnp %al # sched: [1:1.00]
488; SKYLAKE-NEXT: sete %dl # sched: [1:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000489; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
490; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
491; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
492; SKYLAKE-NEXT: retq # sched: [2:1.00]
493;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000494; SKX-LABEL: test_comiss:
495; SKX: # BB#0:
496; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000497; SKX-NEXT: setnp %al # sched: [1:0.50]
498; SKX-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000499; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
500; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000501; SKX-NEXT: setnp %al # sched: [1:0.50]
502; SKX-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000503; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
504; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
505; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000506; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000507;
Simon Pilgrim93986492017-04-18 19:04:40 +0000508; BTVER2-LABEL: test_comiss:
509; BTVER2: # BB#0:
510; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
511; BTVER2-NEXT: setnp %al # sched: [1:0.50]
512; BTVER2-NEXT: sete %cl # sched: [1:0.50]
513; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
514; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
515; BTVER2-NEXT: setnp %al # sched: [1:0.50]
516; BTVER2-NEXT: sete %dl # sched: [1:0.50]
517; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
518; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
519; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
520; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000521;
522; ZNVER1-LABEL: test_comiss:
523; ZNVER1: # BB#0:
524; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
525; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
526; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
527; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
528; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
529; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
530; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
531; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
532; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
533; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000534; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000535 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
536 %2 = load <4 x float>, <4 x float> *%a2, align 4
537 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
538 %4 = or i32 %1, %3
539 ret i32 %4
540}
541declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
542
543define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
544; GENERIC-LABEL: test_cvtsi2ss:
545; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000546; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
547; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
548; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
549; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000550;
551; ATOM-LABEL: test_cvtsi2ss:
552; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000553; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
554; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
555; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
556; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000557;
558; SLM-LABEL: test_cvtsi2ss:
559; SLM: # BB#0:
560; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
561; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
562; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
563; SLM-NEXT: retq # sched: [4:1.00]
564;
565; SANDY-LABEL: test_cvtsi2ss:
566; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000567; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
568; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000569; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000570; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000571;
572; HASWELL-LABEL: test_cvtsi2ss:
573; HASWELL: # BB#0:
574; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
575; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
576; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000577; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000578;
Gadi Haber767d98b2017-08-30 08:08:50 +0000579; SKYLAKE-LABEL: test_cvtsi2ss:
580; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000581; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
582; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
583; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000584; SKYLAKE-NEXT: retq # sched: [2:1.00]
585;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000586; SKX-LABEL: test_cvtsi2ss:
587; SKX: # BB#0:
588; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
589; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000590; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
591; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000592;
Simon Pilgrim93986492017-04-18 19:04:40 +0000593; BTVER2-LABEL: test_cvtsi2ss:
594; BTVER2: # BB#0:
595; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
596; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
597; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
598; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000599;
600; ZNVER1-LABEL: test_cvtsi2ss:
601; ZNVER1: # BB#0:
602; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
603; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
604; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000605; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000606 %1 = sitofp i32 %a0 to float
607 %2 = load i32, i32 *%a1, align 4
608 %3 = sitofp i32 %2 to float
609 %4 = fadd float %1, %3
610 ret float %4
611}
612
613define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
614; GENERIC-LABEL: test_cvtsi2ssq:
615; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000616; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
617; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
618; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
619; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000620;
621; ATOM-LABEL: test_cvtsi2ssq:
622; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000623; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
624; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
625; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
626; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000627;
628; SLM-LABEL: test_cvtsi2ssq:
629; SLM: # BB#0:
630; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
631; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
632; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
633; SLM-NEXT: retq # sched: [4:1.00]
634;
635; SANDY-LABEL: test_cvtsi2ssq:
636; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000637; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
638; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000639; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000640; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000641;
642; HASWELL-LABEL: test_cvtsi2ssq:
643; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000644; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000645; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
646; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000647; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000648;
Gadi Haber767d98b2017-08-30 08:08:50 +0000649; SKYLAKE-LABEL: test_cvtsi2ssq:
650; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000651; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
652; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
653; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000654; SKYLAKE-NEXT: retq # sched: [2:1.00]
655;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000656; SKX-LABEL: test_cvtsi2ssq:
657; SKX: # BB#0:
658; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
659; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000660; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
661; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000662;
Simon Pilgrim93986492017-04-18 19:04:40 +0000663; BTVER2-LABEL: test_cvtsi2ssq:
664; BTVER2: # BB#0:
665; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
666; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
667; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
668; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000669;
670; ZNVER1-LABEL: test_cvtsi2ssq:
671; ZNVER1: # BB#0:
672; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
673; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
674; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000675; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000676 %1 = sitofp i64 %a0 to float
677 %2 = load i64, i64 *%a1, align 8
678 %3 = sitofp i64 %2 to float
679 %4 = fadd float %1, %3
680 ret float %4
681}
682
683define i32 @test_cvtss2si(float %a0, float *%a1) {
684; GENERIC-LABEL: test_cvtss2si:
685; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000686; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
687; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
688; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
689; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000690;
691; ATOM-LABEL: test_cvtss2si:
692; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000693; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
694; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
695; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
696; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000697;
698; SLM-LABEL: test_cvtss2si:
699; SLM: # BB#0:
700; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
701; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
702; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
703; SLM-NEXT: retq # sched: [4:1.00]
704;
705; SANDY-LABEL: test_cvtss2si:
706; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000707; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
708; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000709; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000710; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000711;
712; HASWELL-LABEL: test_cvtss2si:
713; HASWELL: # BB#0:
714; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000715; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000716; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000717; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000718;
Gadi Haber767d98b2017-08-30 08:08:50 +0000719; SKYLAKE-LABEL: test_cvtss2si:
720; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000721; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
722; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [6:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000723; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
724; SKYLAKE-NEXT: retq # sched: [2:1.00]
725;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000726; SKX-LABEL: test_cvtss2si:
727; SKX: # BB#0:
728; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000729; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000730; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000731; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000732;
Simon Pilgrim93986492017-04-18 19:04:40 +0000733; BTVER2-LABEL: test_cvtss2si:
734; BTVER2: # BB#0:
735; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
736; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
737; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
738; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000739;
740; ZNVER1-LABEL: test_cvtss2si:
741; ZNVER1: # BB#0:
742; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
743; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
744; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000745; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000746 %1 = insertelement <4 x float> undef, float %a0, i32 0
747 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
748 %3 = load float, float *%a1, align 4
749 %4 = insertelement <4 x float> undef, float %3, i32 0
750 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
751 %6 = add i32 %2, %5
752 ret i32 %6
753}
754declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
755
756define i64 @test_cvtss2siq(float %a0, float *%a1) {
757; GENERIC-LABEL: test_cvtss2siq:
758; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000759; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
760; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
761; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
762; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000763;
764; ATOM-LABEL: test_cvtss2siq:
765; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000766; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
767; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
768; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
769; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000770;
771; SLM-LABEL: test_cvtss2siq:
772; SLM: # BB#0:
773; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
774; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
775; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
776; SLM-NEXT: retq # sched: [4:1.00]
777;
778; SANDY-LABEL: test_cvtss2siq:
779; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000780; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
781; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000782; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000783; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000784;
785; HASWELL-LABEL: test_cvtss2siq:
786; HASWELL: # BB#0:
787; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000788; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000789; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000790; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000791;
Gadi Haber767d98b2017-08-30 08:08:50 +0000792; SKYLAKE-LABEL: test_cvtss2siq:
793; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000794; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
795; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [6:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000796; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
797; SKYLAKE-NEXT: retq # sched: [2:1.00]
798;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000799; SKX-LABEL: test_cvtss2siq:
800; SKX: # BB#0:
801; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000802; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000803; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000804; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000805;
Simon Pilgrim93986492017-04-18 19:04:40 +0000806; BTVER2-LABEL: test_cvtss2siq:
807; BTVER2: # BB#0:
808; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
809; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
810; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
811; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000812;
813; ZNVER1-LABEL: test_cvtss2siq:
814; ZNVER1: # BB#0:
815; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
816; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
817; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000818; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000819 %1 = insertelement <4 x float> undef, float %a0, i32 0
820 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
821 %3 = load float, float *%a1, align 4
822 %4 = insertelement <4 x float> undef, float %3, i32 0
823 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
824 %6 = add i64 %2, %5
825 ret i64 %6
826}
827declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
828
829define i32 @test_cvttss2si(float %a0, float *%a1) {
830; GENERIC-LABEL: test_cvttss2si:
831; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000832; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
833; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
834; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
835; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000836;
837; ATOM-LABEL: test_cvttss2si:
838; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000839; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
840; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
841; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
842; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000843;
844; SLM-LABEL: test_cvttss2si:
845; SLM: # BB#0:
846; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
847; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
848; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
849; SLM-NEXT: retq # sched: [4:1.00]
850;
851; SANDY-LABEL: test_cvttss2si:
852; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000853; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
854; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000855; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000856; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000857;
858; HASWELL-LABEL: test_cvttss2si:
859; HASWELL: # BB#0:
860; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000861; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000862; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000863; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000864;
Gadi Haber767d98b2017-08-30 08:08:50 +0000865; SKYLAKE-LABEL: test_cvttss2si:
866; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000867; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
868; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [6:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000869; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
870; SKYLAKE-NEXT: retq # sched: [2:1.00]
871;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000872; SKX-LABEL: test_cvttss2si:
873; SKX: # BB#0:
874; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000875; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000876; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000877; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000878;
Simon Pilgrim93986492017-04-18 19:04:40 +0000879; BTVER2-LABEL: test_cvttss2si:
880; BTVER2: # BB#0:
881; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
882; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
883; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
884; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000885;
886; ZNVER1-LABEL: test_cvttss2si:
887; ZNVER1: # BB#0:
888; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
889; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
890; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000891; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000892 %1 = fptosi float %a0 to i32
893 %2 = load float, float *%a1, align 4
894 %3 = fptosi float %2 to i32
895 %4 = add i32 %1, %3
896 ret i32 %4
897}
898
899define i64 @test_cvttss2siq(float %a0, float *%a1) {
900; GENERIC-LABEL: test_cvttss2siq:
901; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000902; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
903; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
904; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
905; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000906;
907; ATOM-LABEL: test_cvttss2siq:
908; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000909; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
910; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
911; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
912; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000913;
914; SLM-LABEL: test_cvttss2siq:
915; SLM: # BB#0:
916; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
917; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
918; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
919; SLM-NEXT: retq # sched: [4:1.00]
920;
921; SANDY-LABEL: test_cvttss2siq:
922; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000923; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
924; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000925; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000926; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000927;
928; HASWELL-LABEL: test_cvttss2siq:
929; HASWELL: # BB#0:
930; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000931; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000932; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000933; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000934;
Gadi Haber767d98b2017-08-30 08:08:50 +0000935; SKYLAKE-LABEL: test_cvttss2siq:
936; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000937; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
938; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [6:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000939; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
940; SKYLAKE-NEXT: retq # sched: [2:1.00]
941;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000942; SKX-LABEL: test_cvttss2siq:
943; SKX: # BB#0:
944; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000945; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000946; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000947; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000948;
Simon Pilgrim93986492017-04-18 19:04:40 +0000949; BTVER2-LABEL: test_cvttss2siq:
950; BTVER2: # BB#0:
951; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
952; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
953; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
954; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000955;
956; ZNVER1-LABEL: test_cvttss2siq:
957; ZNVER1: # BB#0:
958; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
959; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
960; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000961; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000962 %1 = fptosi float %a0 to i64
963 %2 = load float, float *%a1, align 4
964 %3 = fptosi float %2 to i64
965 %4 = add i64 %1, %3
966 ret i64 %4
967}
968
969define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
970; GENERIC-LABEL: test_divps:
971; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000972; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00]
973; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00]
974; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000975;
976; ATOM-LABEL: test_divps:
977; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000978; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
979; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [125:62.50]
980; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000981;
982; SLM-LABEL: test_divps:
983; SLM: # BB#0:
984; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
985; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
986; SLM-NEXT: retq # sched: [4:1.00]
987;
988; SANDY-LABEL: test_divps:
989; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000990; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
991; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
992; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000993;
994; HASWELL-LABEL: test_divps:
995; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000996; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
997; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
998; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000999;
Gadi Haber767d98b2017-08-30 08:08:50 +00001000; SKYLAKE-LABEL: test_divps:
1001; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001002; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1003; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001004; SKYLAKE-NEXT: retq # sched: [2:1.00]
1005;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001006; SKX-LABEL: test_divps:
1007; SKX: # BB#0:
1008; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001009; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
1010; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001011;
Simon Pilgrim93986492017-04-18 19:04:40 +00001012; BTVER2-LABEL: test_divps:
1013; BTVER2: # BB#0:
1014; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1015; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1016; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001017;
1018; ZNVER1-LABEL: test_divps:
1019; ZNVER1: # BB#0:
1020; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1021; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001022; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001023 %1 = fdiv <4 x float> %a0, %a1
1024 %2 = load <4 x float>, <4 x float> *%a2, align 16
1025 %3 = fdiv <4 x float> %1, %2
1026 ret <4 x float> %3
1027}
1028
1029define float @test_divss(float %a0, float %a1, float *%a2) {
1030; GENERIC-LABEL: test_divss:
1031; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001032; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00]
1033; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00]
1034; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001035;
1036; ATOM-LABEL: test_divss:
1037; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001038; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
1039; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [62:31.00]
1040; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001041;
1042; SLM-LABEL: test_divss:
1043; SLM: # BB#0:
1044; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
1045; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
1046; SLM-NEXT: retq # sched: [4:1.00]
1047;
1048; SANDY-LABEL: test_divss:
1049; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001050; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
1051; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
1052; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001053;
1054; HASWELL-LABEL: test_divss:
1055; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001056; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
1057; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
1058; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001059;
Gadi Haber767d98b2017-08-30 08:08:50 +00001060; SKYLAKE-LABEL: test_divss:
1061; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001062; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1063; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001064; SKYLAKE-NEXT: retq # sched: [2:1.00]
1065;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001066; SKX-LABEL: test_divss:
1067; SKX: # BB#0:
1068; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001069; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1070; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001071;
Simon Pilgrim93986492017-04-18 19:04:40 +00001072; BTVER2-LABEL: test_divss:
1073; BTVER2: # BB#0:
1074; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1075; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1076; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001077;
1078; ZNVER1-LABEL: test_divss:
1079; ZNVER1: # BB#0:
1080; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1081; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001082; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001083 %1 = fdiv float %a0, %a1
1084 %2 = load float, float *%a2, align 4
1085 %3 = fdiv float %1, %2
1086 ret float %3
1087}
1088
1089define void @test_ldmxcsr(i32 %a0) {
1090; GENERIC-LABEL: test_ldmxcsr:
1091; GENERIC: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001092; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +00001093; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1094; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001095;
1096; ATOM-LABEL: test_ldmxcsr:
1097; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001098; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1099; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
1100; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001101;
1102; SLM-LABEL: test_ldmxcsr:
1103; SLM: # BB#0:
1104; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1105; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
1106; SLM-NEXT: retq # sched: [4:1.00]
1107;
1108; SANDY-LABEL: test_ldmxcsr:
1109; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001110; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001111; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1112; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001113;
1114; HASWELL-LABEL: test_ldmxcsr:
1115; HASWELL: # BB#0:
1116; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001117; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1118; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001119;
Gadi Haber767d98b2017-08-30 08:08:50 +00001120; SKYLAKE-LABEL: test_ldmxcsr:
1121; SKYLAKE: # BB#0:
1122; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1123; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1124; SKYLAKE-NEXT: retq # sched: [2:1.00]
1125;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001126; SKX-LABEL: test_ldmxcsr:
1127; SKX: # BB#0:
1128; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001129; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1130; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001131;
Simon Pilgrim93986492017-04-18 19:04:40 +00001132; BTVER2-LABEL: test_ldmxcsr:
1133; BTVER2: # BB#0:
1134; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1135; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1136; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001137;
1138; ZNVER1-LABEL: test_ldmxcsr:
1139; ZNVER1: # BB#0:
1140; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001141; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
1142; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001143 %1 = alloca i32, align 4
1144 %2 = bitcast i32* %1 to i8*
1145 store i32 %a0, i32* %1
1146 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1147 ret void
1148}
1149declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
1150
1151define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1152; GENERIC-LABEL: test_maxps:
1153; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001154; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1155; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
1156; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001157;
1158; ATOM-LABEL: test_maxps:
1159; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001160; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
1161; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
1162; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001163;
1164; SLM-LABEL: test_maxps:
1165; SLM: # BB#0:
1166; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1167; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
1168; SLM-NEXT: retq # sched: [4:1.00]
1169;
1170; SANDY-LABEL: test_maxps:
1171; SANDY: # BB#0:
1172; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001173; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1174; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001175;
1176; HASWELL-LABEL: test_maxps:
1177; HASWELL: # BB#0:
1178; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001179; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1180; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001181;
Gadi Haber767d98b2017-08-30 08:08:50 +00001182; SKYLAKE-LABEL: test_maxps:
1183; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001184; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1185; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001186; SKYLAKE-NEXT: retq # sched: [2:1.00]
1187;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001188; SKX-LABEL: test_maxps:
1189; SKX: # BB#0:
1190; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1191; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001192; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001193;
Simon Pilgrim93986492017-04-18 19:04:40 +00001194; BTVER2-LABEL: test_maxps:
1195; BTVER2: # BB#0:
1196; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1197; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1198; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001199;
1200; ZNVER1-LABEL: test_maxps:
1201; ZNVER1: # BB#0:
1202; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1203; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001204; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001205 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1206 %2 = load <4 x float>, <4 x float> *%a2, align 16
1207 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
1208 ret <4 x float> %3
1209}
1210declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1211
1212define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1213; GENERIC-LABEL: test_maxss:
1214; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001215; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1216; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
1217; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001218;
1219; ATOM-LABEL: test_maxss:
1220; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001221; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
1222; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
1223; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001224;
1225; SLM-LABEL: test_maxss:
1226; SLM: # BB#0:
1227; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1228; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
1229; SLM-NEXT: retq # sched: [4:1.00]
1230;
1231; SANDY-LABEL: test_maxss:
1232; SANDY: # BB#0:
1233; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001234; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1235; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001236;
1237; HASWELL-LABEL: test_maxss:
1238; HASWELL: # BB#0:
1239; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001240; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1241; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001242;
Gadi Haber767d98b2017-08-30 08:08:50 +00001243; SKYLAKE-LABEL: test_maxss:
1244; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001245; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1246; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001247; SKYLAKE-NEXT: retq # sched: [2:1.00]
1248;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001249; SKX-LABEL: test_maxss:
1250; SKX: # BB#0:
1251; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001252; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1253; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001254;
Simon Pilgrim93986492017-04-18 19:04:40 +00001255; BTVER2-LABEL: test_maxss:
1256; BTVER2: # BB#0:
1257; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1258; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1259; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001260;
1261; ZNVER1-LABEL: test_maxss:
1262; ZNVER1: # BB#0:
1263; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1264; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001265; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001266 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1267 %2 = load <4 x float>, <4 x float> *%a2, align 16
1268 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
1269 ret <4 x float> %3
1270}
1271declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1272
1273define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1274; GENERIC-LABEL: test_minps:
1275; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001276; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1277; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
1278; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001279;
1280; ATOM-LABEL: test_minps:
1281; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001282; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
1283; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
1284; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001285;
1286; SLM-LABEL: test_minps:
1287; SLM: # BB#0:
1288; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1289; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
1290; SLM-NEXT: retq # sched: [4:1.00]
1291;
1292; SANDY-LABEL: test_minps:
1293; SANDY: # BB#0:
1294; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001295; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1296; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001297;
1298; HASWELL-LABEL: test_minps:
1299; HASWELL: # BB#0:
1300; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001301; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1302; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001303;
Gadi Haber767d98b2017-08-30 08:08:50 +00001304; SKYLAKE-LABEL: test_minps:
1305; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001306; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1307; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001308; SKYLAKE-NEXT: retq # sched: [2:1.00]
1309;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001310; SKX-LABEL: test_minps:
1311; SKX: # BB#0:
1312; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1313; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001314; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001315;
Simon Pilgrim93986492017-04-18 19:04:40 +00001316; BTVER2-LABEL: test_minps:
1317; BTVER2: # BB#0:
1318; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1319; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1320; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001321;
1322; ZNVER1-LABEL: test_minps:
1323; ZNVER1: # BB#0:
1324; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1325; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001326; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001327 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1328 %2 = load <4 x float>, <4 x float> *%a2, align 16
1329 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
1330 ret <4 x float> %3
1331}
1332declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1333
1334define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1335; GENERIC-LABEL: test_minss:
1336; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001337; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1338; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
1339; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001340;
1341; ATOM-LABEL: test_minss:
1342; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001343; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
1344; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
1345; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001346;
1347; SLM-LABEL: test_minss:
1348; SLM: # BB#0:
1349; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1350; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
1351; SLM-NEXT: retq # sched: [4:1.00]
1352;
1353; SANDY-LABEL: test_minss:
1354; SANDY: # BB#0:
1355; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001356; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1357; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001358;
1359; HASWELL-LABEL: test_minss:
1360; HASWELL: # BB#0:
1361; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001362; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1363; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001364;
Gadi Haber767d98b2017-08-30 08:08:50 +00001365; SKYLAKE-LABEL: test_minss:
1366; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001367; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1368; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001369; SKYLAKE-NEXT: retq # sched: [2:1.00]
1370;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001371; SKX-LABEL: test_minss:
1372; SKX: # BB#0:
1373; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001374; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1375; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001376;
Simon Pilgrim93986492017-04-18 19:04:40 +00001377; BTVER2-LABEL: test_minss:
1378; BTVER2: # BB#0:
1379; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1380; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1381; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001382;
1383; ZNVER1-LABEL: test_minss:
1384; ZNVER1: # BB#0:
1385; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1386; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001387; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001388 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1389 %2 = load <4 x float>, <4 x float> *%a2, align 16
1390 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
1391 ret <4 x float> %3
1392}
1393declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1394
1395define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
1396; GENERIC-LABEL: test_movaps:
1397; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001398; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
1399; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1400; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
1401; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001402;
1403; ATOM-LABEL: test_movaps:
1404; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001405; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
1406; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1407; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1408; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001409;
1410; SLM-LABEL: test_movaps:
1411; SLM: # BB#0:
1412; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1413; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1414; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1415; SLM-NEXT: retq # sched: [4:1.00]
1416;
1417; SANDY-LABEL: test_movaps:
1418; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001419; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001420; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001421; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
1422; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001423;
1424; HASWELL-LABEL: test_movaps:
1425; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001426; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001427; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001428; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001429; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001430;
Gadi Haber767d98b2017-08-30 08:08:50 +00001431; SKYLAKE-LABEL: test_movaps:
1432; SKYLAKE: # BB#0:
1433; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001434; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001435; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1436; SKYLAKE-NEXT: retq # sched: [2:1.00]
1437;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001438; SKX-LABEL: test_movaps:
1439; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001440; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
1441; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001442; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001443; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001444;
Simon Pilgrim93986492017-04-18 19:04:40 +00001445; BTVER2-LABEL: test_movaps:
1446; BTVER2: # BB#0:
1447; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1448; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1449; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1450; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001451;
1452; ZNVER1-LABEL: test_movaps:
1453; ZNVER1: # BB#0:
1454; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
1455; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1456; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001457; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001458 %1 = load <4 x float>, <4 x float> *%a0, align 16
1459 %2 = fadd <4 x float> %1, %1
1460 store <4 x float> %2, <4 x float> *%a1, align 16
1461 ret void
1462}
1463
1464; TODO (v)movhlps
1465
1466define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1467; GENERIC-LABEL: test_movhlps:
1468; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001469; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1470; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001471;
1472; ATOM-LABEL: test_movhlps:
1473; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001474; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1475; ATOM-NEXT: nop # sched: [1:0.50]
1476; ATOM-NEXT: nop # sched: [1:0.50]
1477; ATOM-NEXT: nop # sched: [1:0.50]
1478; ATOM-NEXT: nop # sched: [1:0.50]
1479; ATOM-NEXT: nop # sched: [1:0.50]
1480; ATOM-NEXT: nop # sched: [1:0.50]
1481; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001482;
1483; SLM-LABEL: test_movhlps:
1484; SLM: # BB#0:
1485; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1486; SLM-NEXT: retq # sched: [4:1.00]
1487;
1488; SANDY-LABEL: test_movhlps:
1489; SANDY: # BB#0:
1490; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001491; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001492;
1493; HASWELL-LABEL: test_movhlps:
1494; HASWELL: # BB#0:
1495; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001496; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001497;
Gadi Haber767d98b2017-08-30 08:08:50 +00001498; SKYLAKE-LABEL: test_movhlps:
1499; SKYLAKE: # BB#0:
1500; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1501; SKYLAKE-NEXT: retq # sched: [2:1.00]
1502;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001503; SKX-LABEL: test_movhlps:
1504; SKX: # BB#0:
1505; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001506; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001507;
Simon Pilgrim93986492017-04-18 19:04:40 +00001508; BTVER2-LABEL: test_movhlps:
1509; BTVER2: # BB#0:
1510; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1511; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001512;
1513; ZNVER1-LABEL: test_movhlps:
1514; ZNVER1: # BB#0:
1515; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001516; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001517 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1518 ret <4 x float> %1
1519}
1520
1521; TODO (v)movhps
1522
1523define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1524; GENERIC-LABEL: test_movhps:
1525; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001526; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
1527; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1528; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1529; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1530; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001531;
1532; ATOM-LABEL: test_movhps:
1533; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001534; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1535; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1536; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1537; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1538; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001539;
1540; SLM-LABEL: test_movhps:
1541; SLM: # BB#0:
1542; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1543; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1544; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1545; SLM-NEXT: retq # sched: [4:1.00]
1546;
1547; SANDY-LABEL: test_movhps:
1548; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001549; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001550; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1551; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001552; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001553;
1554; HASWELL-LABEL: test_movhps:
1555; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001556; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001557; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001558; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1559; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001560;
Gadi Haber767d98b2017-08-30 08:08:50 +00001561; SKYLAKE-LABEL: test_movhps:
1562; SKYLAKE: # BB#0:
1563; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001564; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001565; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1566; SKYLAKE-NEXT: retq # sched: [2:1.00]
1567;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001568; SKX-LABEL: test_movhps:
1569; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001570; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1571; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1572; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1573; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001574;
Simon Pilgrim93986492017-04-18 19:04:40 +00001575; BTVER2-LABEL: test_movhps:
1576; BTVER2: # BB#0:
1577; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1578; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1579; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1580; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001581;
1582; ZNVER1-LABEL: test_movhps:
1583; ZNVER1: # BB#0:
1584; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
1585; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1586; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001587; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001588 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1589 %2 = load <2 x float>, <2 x float> *%1, align 8
1590 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1591 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1592 %5 = fadd <4 x float> %a0, %4
1593 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1594 store <2 x float> %6, <2 x float>* %1
1595 ret void
1596}
1597
1598; TODO (v)movlhps
1599
1600define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1601; GENERIC-LABEL: test_movlhps:
1602; GENERIC: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001603; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +00001604; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1605; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001606;
1607; ATOM-LABEL: test_movlhps:
1608; ATOM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001609; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001610; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1611; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001612;
1613; SLM-LABEL: test_movlhps:
1614; SLM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001615; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001616; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1617; SLM-NEXT: retq # sched: [4:1.00]
1618;
1619; SANDY-LABEL: test_movlhps:
1620; SANDY: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001621; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001622; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001623; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001624;
1625; HASWELL-LABEL: test_movlhps:
1626; HASWELL: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001627; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001628; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001629; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001630;
Gadi Haber767d98b2017-08-30 08:08:50 +00001631; SKYLAKE-LABEL: test_movlhps:
1632; SKYLAKE: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001633; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001634; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001635; SKYLAKE-NEXT: retq # sched: [2:1.00]
1636;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001637; SKX-LABEL: test_movlhps:
1638; SKX: # BB#0:
1639; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001640; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
1641; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001642;
Simon Pilgrim93986492017-04-18 19:04:40 +00001643; BTVER2-LABEL: test_movlhps:
1644; BTVER2: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001645; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001646; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1647; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001648;
1649; ZNVER1-LABEL: test_movlhps:
1650; ZNVER1: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001651; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00001652; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001653; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001654 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1655 %2 = fadd <4 x float> %a1, %1
1656 ret <4 x float> %2
1657}
1658
1659define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1660; GENERIC-LABEL: test_movlps:
1661; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001662; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
1663; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1664; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1665; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001666;
1667; ATOM-LABEL: test_movlps:
1668; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001669; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1670; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1671; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1672; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001673;
1674; SLM-LABEL: test_movlps:
1675; SLM: # BB#0:
1676; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1677; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1678; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1679; SLM-NEXT: retq # sched: [4:1.00]
1680;
1681; SANDY-LABEL: test_movlps:
1682; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001683; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001684; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001685; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
1686; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001687;
1688; HASWELL-LABEL: test_movlps:
1689; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001690; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001691; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001692; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001693; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001694;
Gadi Haber767d98b2017-08-30 08:08:50 +00001695; SKYLAKE-LABEL: test_movlps:
1696; SKYLAKE: # BB#0:
1697; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001698; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001699; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1700; SKYLAKE-NEXT: retq # sched: [2:1.00]
1701;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001702; SKX-LABEL: test_movlps:
1703; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001704; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1705; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001706; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001707; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001708;
Simon Pilgrim93986492017-04-18 19:04:40 +00001709; BTVER2-LABEL: test_movlps:
1710; BTVER2: # BB#0:
1711; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1712; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1713; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1714; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001715;
1716; ZNVER1-LABEL: test_movlps:
1717; ZNVER1: # BB#0:
1718; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
1719; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1720; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001721; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001722 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1723 %2 = load <2 x float>, <2 x float> *%1, align 8
1724 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1725 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1726 %5 = fadd <4 x float> %a0, %4
1727 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1728 store <2 x float> %6, <2 x float>* %1
1729 ret void
1730}
1731
1732define i32 @test_movmskps(<4 x float> %a0) {
1733; GENERIC-LABEL: test_movmskps:
1734; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001735; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
1736; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001737;
1738; ATOM-LABEL: test_movmskps:
1739; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001740; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
1741; ATOM-NEXT: nop # sched: [1:0.50]
1742; ATOM-NEXT: nop # sched: [1:0.50]
1743; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001744;
1745; SLM-LABEL: test_movmskps:
1746; SLM: # BB#0:
1747; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1748; SLM-NEXT: retq # sched: [4:1.00]
1749;
1750; SANDY-LABEL: test_movmskps:
1751; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001752; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1753; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001754;
1755; HASWELL-LABEL: test_movmskps:
1756; HASWELL: # BB#0:
1757; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001758; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001759;
Gadi Haber767d98b2017-08-30 08:08:50 +00001760; SKYLAKE-LABEL: test_movmskps:
1761; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001762; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001763; SKYLAKE-NEXT: retq # sched: [2:1.00]
1764;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001765; SKX-LABEL: test_movmskps:
1766; SKX: # BB#0:
1767; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001768; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001769;
Simon Pilgrim93986492017-04-18 19:04:40 +00001770; BTVER2-LABEL: test_movmskps:
1771; BTVER2: # BB#0:
1772; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
1773; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001774;
1775; ZNVER1-LABEL: test_movmskps:
1776; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001777; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
1778; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001779 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1780 ret i32 %1
1781}
1782declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1783
1784define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1785; GENERIC-LABEL: test_movntps:
1786; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001787; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
1788; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001789;
1790; ATOM-LABEL: test_movntps:
1791; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001792; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1793; ATOM-NEXT: nop # sched: [1:0.50]
1794; ATOM-NEXT: nop # sched: [1:0.50]
1795; ATOM-NEXT: nop # sched: [1:0.50]
1796; ATOM-NEXT: nop # sched: [1:0.50]
1797; ATOM-NEXT: nop # sched: [1:0.50]
1798; ATOM-NEXT: nop # sched: [1:0.50]
1799; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001800;
1801; SLM-LABEL: test_movntps:
1802; SLM: # BB#0:
1803; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1804; SLM-NEXT: retq # sched: [4:1.00]
1805;
1806; SANDY-LABEL: test_movntps:
1807; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001808; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
1809; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001810;
1811; HASWELL-LABEL: test_movntps:
1812; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001813; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001814; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001815;
Gadi Haber767d98b2017-08-30 08:08:50 +00001816; SKYLAKE-LABEL: test_movntps:
1817; SKYLAKE: # BB#0:
1818; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1819; SKYLAKE-NEXT: retq # sched: [2:1.00]
1820;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001821; SKX-LABEL: test_movntps:
1822; SKX: # BB#0:
1823; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001824; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001825;
Simon Pilgrim93986492017-04-18 19:04:40 +00001826; BTVER2-LABEL: test_movntps:
1827; BTVER2: # BB#0:
1828; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1829; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001830;
1831; ZNVER1-LABEL: test_movntps:
1832; ZNVER1: # BB#0:
1833; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001834; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001835 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
1836 ret void
1837}
1838
1839define void @test_movss_mem(float* %a0, float* %a1) {
1840; GENERIC-LABEL: test_movss_mem:
1841; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001842; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1843; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1844; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
1845; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001846;
1847; ATOM-LABEL: test_movss_mem:
1848; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001849; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
1850; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
1851; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1852; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001853;
1854; SLM-LABEL: test_movss_mem:
1855; SLM: # BB#0:
1856; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
1857; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
1858; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
1859; SLM-NEXT: retq # sched: [4:1.00]
1860;
1861; SANDY-LABEL: test_movss_mem:
1862; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001863; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001864; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001865; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
1866; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001867;
1868; HASWELL-LABEL: test_movss_mem:
1869; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001870; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001871; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001872; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001873; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001874;
Gadi Haber767d98b2017-08-30 08:08:50 +00001875; SKYLAKE-LABEL: test_movss_mem:
1876; SKYLAKE: # BB#0:
1877; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001878; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001879; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1880; SKYLAKE-NEXT: retq # sched: [2:1.00]
1881;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001882; SKX-LABEL: test_movss_mem:
1883; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001884; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
1885; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001886; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001887; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001888;
Simon Pilgrim93986492017-04-18 19:04:40 +00001889; BTVER2-LABEL: test_movss_mem:
1890; BTVER2: # BB#0:
1891; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
1892; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1893; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
1894; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001895;
1896; ZNVER1-LABEL: test_movss_mem:
1897; ZNVER1: # BB#0:
1898; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
1899; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1900; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001901; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001902 %1 = load float, float* %a0, align 1
1903 %2 = fadd float %1, %1
1904 store float %2, float *%a1, align 1
1905 ret void
1906}
1907
1908define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
1909; GENERIC-LABEL: test_movss_reg:
1910; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001911; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1912; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001913;
1914; ATOM-LABEL: test_movss_reg:
1915; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001916; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1917; ATOM-NEXT: nop # sched: [1:0.50]
1918; ATOM-NEXT: nop # sched: [1:0.50]
1919; ATOM-NEXT: nop # sched: [1:0.50]
1920; ATOM-NEXT: nop # sched: [1:0.50]
1921; ATOM-NEXT: nop # sched: [1:0.50]
1922; ATOM-NEXT: nop # sched: [1:0.50]
1923; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001924;
1925; SLM-LABEL: test_movss_reg:
1926; SLM: # BB#0:
1927; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
1928; SLM-NEXT: retq # sched: [4:1.00]
1929;
1930; SANDY-LABEL: test_movss_reg:
1931; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001932; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001933; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001934;
1935; HASWELL-LABEL: test_movss_reg:
1936; HASWELL: # BB#0:
1937; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001938; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001939;
Gadi Haber767d98b2017-08-30 08:08:50 +00001940; SKYLAKE-LABEL: test_movss_reg:
1941; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001942; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001943; SKYLAKE-NEXT: retq # sched: [2:1.00]
1944;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001945; SKX-LABEL: test_movss_reg:
1946; SKX: # BB#0:
1947; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001948; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001949;
Simon Pilgrim93986492017-04-18 19:04:40 +00001950; BTVER2-LABEL: test_movss_reg:
1951; BTVER2: # BB#0:
1952; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
1953; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001954;
1955; ZNVER1-LABEL: test_movss_reg:
1956; ZNVER1: # BB#0:
1957; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001958; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001959 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1960 ret <4 x float> %1
1961}
1962
1963define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
1964; GENERIC-LABEL: test_movups:
1965; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001966; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
1967; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1968; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
1969; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001970;
1971; ATOM-LABEL: test_movups:
1972; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001973; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
1974; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1975; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
1976; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001977;
1978; SLM-LABEL: test_movups:
1979; SLM: # BB#0:
1980; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
1981; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1982; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
1983; SLM-NEXT: retq # sched: [4:1.00]
1984;
1985; SANDY-LABEL: test_movups:
1986; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001987; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001988; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001989; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
1990; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001991;
1992; HASWELL-LABEL: test_movups:
1993; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001994; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001995; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001996; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001997; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001998;
Gadi Haber767d98b2017-08-30 08:08:50 +00001999; SKYLAKE-LABEL: test_movups:
2000; SKYLAKE: # BB#0:
2001; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002002; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002003; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2004; SKYLAKE-NEXT: retq # sched: [2:1.00]
2005;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002006; SKX-LABEL: test_movups:
2007; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002008; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
2009; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002010; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002011; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002012;
Simon Pilgrim93986492017-04-18 19:04:40 +00002013; BTVER2-LABEL: test_movups:
2014; BTVER2: # BB#0:
2015; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
2016; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2017; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2018; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002019;
2020; ZNVER1-LABEL: test_movups:
2021; ZNVER1: # BB#0:
2022; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
2023; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2024; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002025; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002026 %1 = load <4 x float>, <4 x float> *%a0, align 1
2027 %2 = fadd <4 x float> %1, %1
2028 store <4 x float> %2, <4 x float> *%a1, align 1
2029 ret void
2030}
2031
2032define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2033; GENERIC-LABEL: test_mulps:
2034; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002035; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
2036; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
2037; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002038;
2039; ATOM-LABEL: test_mulps:
2040; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002041; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
2042; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [10:5.00]
2043; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002044;
2045; SLM-LABEL: test_mulps:
2046; SLM: # BB#0:
2047; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
2048; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
2049; SLM-NEXT: retq # sched: [4:1.00]
2050;
2051; SANDY-LABEL: test_mulps:
2052; SANDY: # BB#0:
2053; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002054; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2055; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002056;
2057; HASWELL-LABEL: test_mulps:
2058; HASWELL: # BB#0:
2059; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002060; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2061; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002062;
Gadi Haber767d98b2017-08-30 08:08:50 +00002063; SKYLAKE-LABEL: test_mulps:
2064; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002065; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2066; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002067; SKYLAKE-NEXT: retq # sched: [2:1.00]
2068;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002069; SKX-LABEL: test_mulps:
2070; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002071; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2072; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2073; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002074;
Simon Pilgrim93986492017-04-18 19:04:40 +00002075; BTVER2-LABEL: test_mulps:
2076; BTVER2: # BB#0:
2077; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2078; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2079; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002080;
2081; ZNVER1-LABEL: test_mulps:
2082; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002083; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2084; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2085; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002086 %1 = fmul <4 x float> %a0, %a1
2087 %2 = load <4 x float>, <4 x float> *%a2, align 16
2088 %3 = fmul <4 x float> %1, %2
2089 ret <4 x float> %3
2090}
2091
2092define float @test_mulss(float %a0, float %a1, float *%a2) {
2093; GENERIC-LABEL: test_mulss:
2094; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002095; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
2096; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
2097; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002098;
2099; ATOM-LABEL: test_mulss:
2100; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002101; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
2102; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [5:5.00]
2103; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002104;
2105; SLM-LABEL: test_mulss:
2106; SLM: # BB#0:
2107; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
2108; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
2109; SLM-NEXT: retq # sched: [4:1.00]
2110;
2111; SANDY-LABEL: test_mulss:
2112; SANDY: # BB#0:
2113; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002114; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2115; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002116;
2117; HASWELL-LABEL: test_mulss:
2118; HASWELL: # BB#0:
2119; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002120; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2121; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002122;
Gadi Haber767d98b2017-08-30 08:08:50 +00002123; SKYLAKE-LABEL: test_mulss:
2124; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002125; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2126; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002127; SKYLAKE-NEXT: retq # sched: [2:1.00]
2128;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002129; SKX-LABEL: test_mulss:
2130; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002131; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2132; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
2133; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002134;
Simon Pilgrim93986492017-04-18 19:04:40 +00002135; BTVER2-LABEL: test_mulss:
2136; BTVER2: # BB#0:
2137; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2138; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2139; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002140;
2141; ZNVER1-LABEL: test_mulss:
2142; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002143; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2144; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2145; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002146 %1 = fmul float %a0, %a1
2147 %2 = load float, float *%a2, align 4
2148 %3 = fmul float %1, %2
2149 ret float %3
2150}
2151
2152define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2153; GENERIC-LABEL: test_orps:
2154; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002155; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
2156; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
2157; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002158;
2159; ATOM-LABEL: test_orps:
2160; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00002161; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2162; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002163; ATOM-NEXT: nop # sched: [1:0.50]
2164; ATOM-NEXT: nop # sched: [1:0.50]
2165; ATOM-NEXT: nop # sched: [1:0.50]
2166; ATOM-NEXT: nop # sched: [1:0.50]
2167; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002168;
2169; SLM-LABEL: test_orps:
2170; SLM: # BB#0:
2171; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2172; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
2173; SLM-NEXT: retq # sched: [4:1.00]
2174;
2175; SANDY-LABEL: test_orps:
2176; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002177; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2178; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2179; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002180;
2181; HASWELL-LABEL: test_orps:
2182; HASWELL: # BB#0:
2183; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002184; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
2185; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002186;
Gadi Haber767d98b2017-08-30 08:08:50 +00002187; SKYLAKE-LABEL: test_orps:
2188; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002189; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2190; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002191; SKYLAKE-NEXT: retq # sched: [2:1.00]
2192;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002193; SKX-LABEL: test_orps:
2194; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002195; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2196; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
2197; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002198;
Simon Pilgrim93986492017-04-18 19:04:40 +00002199; BTVER2-LABEL: test_orps:
2200; BTVER2: # BB#0:
2201; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2202; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2203; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002204;
2205; ZNVER1-LABEL: test_orps:
2206; ZNVER1: # BB#0:
2207; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2208; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002209; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002210 %1 = bitcast <4 x float> %a0 to <4 x i32>
2211 %2 = bitcast <4 x float> %a1 to <4 x i32>
2212 %3 = or <4 x i32> %1, %2
2213 %4 = load <4 x float>, <4 x float> *%a2, align 16
2214 %5 = bitcast <4 x float> %4 to <4 x i32>
2215 %6 = or <4 x i32> %3, %5
2216 %7 = bitcast <4 x i32> %6 to <4 x float>
2217 ret <4 x float> %7
2218}
2219
2220define void @test_prefetchnta(i8* %a0) {
2221; GENERIC-LABEL: test_prefetchnta:
2222; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002223; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2224; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002225;
2226; ATOM-LABEL: test_prefetchnta:
2227; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002228; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
2229; ATOM-NEXT: nop # sched: [1:0.50]
2230; ATOM-NEXT: nop # sched: [1:0.50]
2231; ATOM-NEXT: nop # sched: [1:0.50]
2232; ATOM-NEXT: nop # sched: [1:0.50]
2233; ATOM-NEXT: nop # sched: [1:0.50]
2234; ATOM-NEXT: nop # sched: [1:0.50]
2235; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002236;
2237; SLM-LABEL: test_prefetchnta:
2238; SLM: # BB#0:
2239; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
2240; SLM-NEXT: retq # sched: [4:1.00]
2241;
2242; SANDY-LABEL: test_prefetchnta:
2243; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002244; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2245; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002246;
2247; HASWELL-LABEL: test_prefetchnta:
2248; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002249; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2250; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002251;
Gadi Haber767d98b2017-08-30 08:08:50 +00002252; SKYLAKE-LABEL: test_prefetchnta:
2253; SKYLAKE: # BB#0:
2254; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2255; SKYLAKE-NEXT: retq # sched: [2:1.00]
2256;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002257; SKX-LABEL: test_prefetchnta:
2258; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002259; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2260; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002261;
Simon Pilgrim93986492017-04-18 19:04:40 +00002262; BTVER2-LABEL: test_prefetchnta:
2263; BTVER2: # BB#0:
2264; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
2265; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002266;
2267; ZNVER1-LABEL: test_prefetchnta:
2268; ZNVER1: # BB#0:
2269; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002270; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002271 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
2272 ret void
2273}
2274declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
2275
2276define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
2277; GENERIC-LABEL: test_rcpps:
2278; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002279; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
2280; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
2281; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2282; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002283;
2284; ATOM-LABEL: test_rcpps:
2285; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002286; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
2287; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
2288; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2289; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2290; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002291;
2292; SLM-LABEL: test_rcpps:
2293; SLM: # BB#0:
2294; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
2295; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
2296; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2297; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2298; SLM-NEXT: retq # sched: [4:1.00]
2299;
2300; SANDY-LABEL: test_rcpps:
2301; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002302; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002303; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002304; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002305; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002306;
2307; HASWELL-LABEL: test_rcpps:
2308; HASWELL: # BB#0:
2309; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002310; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002311; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002312; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002313;
Gadi Haber767d98b2017-08-30 08:08:50 +00002314; SKYLAKE-LABEL: test_rcpps:
2315; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002316; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
2317; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [4:1.00]
2318; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002319; SKYLAKE-NEXT: retq # sched: [2:1.00]
2320;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002321; SKX-LABEL: test_rcpps:
2322; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002323; SKX-NEXT: vrcp14ps %xmm0, %xmm0 # sched: [4:1.00]
2324; SKX-NEXT: vrcp14ps (%rdi), %xmm1 # sched: [10:1.00]
2325; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2326; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002327;
Simon Pilgrim93986492017-04-18 19:04:40 +00002328; BTVER2-LABEL: test_rcpps:
2329; BTVER2: # BB#0:
2330; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
2331; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
2332; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2333; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002334;
2335; ZNVER1-LABEL: test_rcpps:
2336; ZNVER1: # BB#0:
2337; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
2338; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
2339; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002340; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002341 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
2342 %2 = load <4 x float>, <4 x float> *%a1, align 16
2343 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
2344 %4 = fadd <4 x float> %1, %3
2345 ret <4 x float> %4
2346}
2347declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
2348
2349; TODO - rcpss_m
2350
2351define <4 x float> @test_rcpss(float %a0, float *%a1) {
2352; GENERIC-LABEL: test_rcpss:
2353; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002354; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
2355; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2356; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
2357; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2358; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002359;
2360; ATOM-LABEL: test_rcpss:
2361; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002362; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002363; ATOM-NEXT: rcpss %xmm0, %xmm0
2364; ATOM-NEXT: rcpss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002365; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2366; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002367;
2368; SLM-LABEL: test_rcpss:
2369; SLM: # BB#0:
2370; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2371; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
2372; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
2373; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2374; SLM-NEXT: retq # sched: [4:1.00]
2375;
2376; SANDY-LABEL: test_rcpss:
2377; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002378; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002379; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
Gadi Haberbed2c502017-08-13 13:59:24 +00002380; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002381; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002382; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002383;
2384; HASWELL-LABEL: test_rcpss:
2385; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002386; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2387; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2388; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002389; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002390; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002391;
Gadi Haber767d98b2017-08-30 08:08:50 +00002392; SKYLAKE-LABEL: test_rcpss:
2393; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002394; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002395; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002396; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2397; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002398; SKYLAKE-NEXT: retq # sched: [2:1.00]
2399;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002400; SKX-LABEL: test_rcpss:
2401; SKX: # BB#0:
2402; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002403; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002404; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002405; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2406; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002407;
Simon Pilgrim93986492017-04-18 19:04:40 +00002408; BTVER2-LABEL: test_rcpss:
2409; BTVER2: # BB#0:
2410; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2411; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2412; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2413; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2414; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002415;
2416; ZNVER1-LABEL: test_rcpss:
2417; ZNVER1: # BB#0:
2418; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2419; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
2420; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
2421; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002422; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002423 %1 = insertelement <4 x float> undef, float %a0, i32 0
2424 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
2425 %3 = load float, float *%a1, align 4
2426 %4 = insertelement <4 x float> undef, float %3, i32 0
2427 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
2428 %6 = fadd <4 x float> %2, %5
2429 ret <4 x float> %6
2430}
2431declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
2432
2433define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
2434; GENERIC-LABEL: test_rsqrtps:
2435; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002436; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
2437; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
2438; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2439; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002440;
2441; ATOM-LABEL: test_rsqrtps:
2442; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002443; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
2444; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
2445; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2446; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2447; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002448;
2449; SLM-LABEL: test_rsqrtps:
2450; SLM: # BB#0:
2451; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
2452; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2453; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2454; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2455; SLM-NEXT: retq # sched: [4:1.00]
2456;
2457; SANDY-LABEL: test_rsqrtps:
2458; SANDY: # BB#0:
2459; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002460; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002461; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002462; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002463;
2464; HASWELL-LABEL: test_rsqrtps:
2465; HASWELL: # BB#0:
2466; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002467; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002468; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002469; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002470;
Gadi Haber767d98b2017-08-30 08:08:50 +00002471; SKYLAKE-LABEL: test_rsqrtps:
2472; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002473; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
2474; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [4:1.00]
2475; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002476; SKYLAKE-NEXT: retq # sched: [2:1.00]
2477;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002478; SKX-LABEL: test_rsqrtps:
2479; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002480; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 # sched: [4:1.00]
2481; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1 # sched: [10:1.00]
2482; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2483; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002484;
Simon Pilgrim93986492017-04-18 19:04:40 +00002485; BTVER2-LABEL: test_rsqrtps:
2486; BTVER2: # BB#0:
2487; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
2488; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
2489; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2490; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002491;
2492; ZNVER1-LABEL: test_rsqrtps:
2493; ZNVER1: # BB#0:
2494; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
2495; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
2496; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002497; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002498 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2499 %2 = load <4 x float>, <4 x float> *%a1, align 16
2500 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
2501 %4 = fadd <4 x float> %1, %3
2502 ret <4 x float> %4
2503}
2504declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2505
2506; TODO - rsqrtss_m
2507
2508define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
2509; GENERIC-LABEL: test_rsqrtss:
2510; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002511; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
2512; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2513; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
2514; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2515; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002516;
2517; ATOM-LABEL: test_rsqrtss:
2518; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002519; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002520; ATOM-NEXT: rsqrtss %xmm0, %xmm0
2521; ATOM-NEXT: rsqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002522; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2523; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002524;
2525; SLM-LABEL: test_rsqrtss:
2526; SLM: # BB#0:
2527; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2528; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
2529; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
2530; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2531; SLM-NEXT: retq # sched: [4:1.00]
2532;
2533; SANDY-LABEL: test_rsqrtss:
2534; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002535; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2536; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2537; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002538; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002539; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002540;
2541; HASWELL-LABEL: test_rsqrtss:
2542; HASWELL: # BB#0:
2543; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002544; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002545; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2546; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002547; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002548;
Gadi Haber767d98b2017-08-30 08:08:50 +00002549; SKYLAKE-LABEL: test_rsqrtss:
2550; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002551; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002552; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002553; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2554; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002555; SKYLAKE-NEXT: retq # sched: [2:1.00]
2556;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002557; SKX-LABEL: test_rsqrtss:
2558; SKX: # BB#0:
2559; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002560; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002561; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002562; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2563; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002564;
Simon Pilgrim93986492017-04-18 19:04:40 +00002565; BTVER2-LABEL: test_rsqrtss:
2566; BTVER2: # BB#0:
2567; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2568; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2569; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2570; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2571; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002572;
2573; ZNVER1-LABEL: test_rsqrtss:
2574; ZNVER1: # BB#0:
2575; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002576; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
2577; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00002578; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002579; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002580 %1 = insertelement <4 x float> undef, float %a0, i32 0
2581 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
2582 %3 = load float, float *%a1, align 4
2583 %4 = insertelement <4 x float> undef, float %3, i32 0
2584 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
2585 %6 = fadd <4 x float> %2, %5
2586 ret <4 x float> %6
2587}
2588declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2589
2590define void @test_sfence() {
2591; GENERIC-LABEL: test_sfence:
2592; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002593; GENERIC-NEXT: sfence # sched: [1:1.00]
2594; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002595;
2596; ATOM-LABEL: test_sfence:
2597; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002598; ATOM-NEXT: sfence # sched: [1:1.00]
2599; ATOM-NEXT: nop # sched: [1:0.50]
2600; ATOM-NEXT: nop # sched: [1:0.50]
2601; ATOM-NEXT: nop # sched: [1:0.50]
2602; ATOM-NEXT: nop # sched: [1:0.50]
2603; ATOM-NEXT: nop # sched: [1:0.50]
2604; ATOM-NEXT: nop # sched: [1:0.50]
2605; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002606;
2607; SLM-LABEL: test_sfence:
2608; SLM: # BB#0:
2609; SLM-NEXT: sfence # sched: [1:1.00]
2610; SLM-NEXT: retq # sched: [4:1.00]
2611;
2612; SANDY-LABEL: test_sfence:
2613; SANDY: # BB#0:
2614; SANDY-NEXT: sfence # sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002615; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002616;
2617; HASWELL-LABEL: test_sfence:
2618; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002619; HASWELL-NEXT: sfence # sched: [1:0.33]
2620; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002621;
Gadi Haber767d98b2017-08-30 08:08:50 +00002622; SKYLAKE-LABEL: test_sfence:
2623; SKYLAKE: # BB#0:
2624; SKYLAKE-NEXT: sfence # sched: [1:0.33]
2625; SKYLAKE-NEXT: retq # sched: [2:1.00]
2626;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002627; SKX-LABEL: test_sfence:
2628; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002629; SKX-NEXT: sfence # sched: [2:0.33]
2630; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002631;
Simon Pilgrim93986492017-04-18 19:04:40 +00002632; BTVER2-LABEL: test_sfence:
2633; BTVER2: # BB#0:
2634; BTVER2-NEXT: sfence # sched: [1:1.00]
2635; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002636;
2637; ZNVER1-LABEL: test_sfence:
2638; ZNVER1: # BB#0:
2639; ZNVER1-NEXT: sfence # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002640; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002641 call void @llvm.x86.sse.sfence()
2642 ret void
2643}
2644declare void @llvm.x86.sse.sfence() nounwind readnone
2645
2646define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
2647; GENERIC-LABEL: test_shufps:
2648; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002649; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2650; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2651; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002652;
2653; ATOM-LABEL: test_shufps:
2654; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002655; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2656; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2657; ATOM-NEXT: nop # sched: [1:0.50]
2658; ATOM-NEXT: nop # sched: [1:0.50]
2659; ATOM-NEXT: nop # sched: [1:0.50]
2660; ATOM-NEXT: nop # sched: [1:0.50]
2661; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002662;
2663; SLM-LABEL: test_shufps:
2664; SLM: # BB#0:
2665; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2666; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
2667; SLM-NEXT: retq # sched: [4:1.00]
2668;
2669; SANDY-LABEL: test_shufps:
2670; SANDY: # BB#0:
2671; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002672; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2673; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002674;
2675; HASWELL-LABEL: test_shufps:
2676; HASWELL: # BB#0:
2677; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002678; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2679; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002680;
Gadi Haber767d98b2017-08-30 08:08:50 +00002681; SKYLAKE-LABEL: test_shufps:
2682; SKYLAKE: # BB#0:
2683; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2684; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2685; SKYLAKE-NEXT: retq # sched: [2:1.00]
2686;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002687; SKX-LABEL: test_shufps:
2688; SKX: # BB#0:
2689; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002690; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2691; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002692;
Simon Pilgrim93986492017-04-18 19:04:40 +00002693; BTVER2-LABEL: test_shufps:
2694; BTVER2: # BB#0:
2695; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2696; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2697; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002698;
2699; ZNVER1-LABEL: test_shufps:
2700; ZNVER1: # BB#0:
2701; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2702; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002703; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002704 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2705 %2 = load <4 x float>, <4 x float> *%a2, align 16
2706 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
2707 ret <4 x float> %3
2708}
2709
2710define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
2711; GENERIC-LABEL: test_sqrtps:
2712; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002713; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00]
2714; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00]
2715; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2716; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002717;
2718; ATOM-LABEL: test_sqrtps:
2719; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002720; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
2721; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
2722; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2723; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002724;
2725; SLM-LABEL: test_sqrtps:
2726; SLM: # BB#0:
2727; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
2728; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
2729; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2730; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2731; SLM-NEXT: retq # sched: [4:1.00]
2732;
2733; SANDY-LABEL: test_sqrtps:
2734; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002735; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2736; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002737; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002738; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002739;
2740; HASWELL-LABEL: test_sqrtps:
2741; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002742; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2743; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002744; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002745; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002746;
Gadi Haber767d98b2017-08-30 08:08:50 +00002747; SKYLAKE-LABEL: test_sqrtps:
2748; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002749; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
2750; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [12:1.00]
2751; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002752; SKYLAKE-NEXT: retq # sched: [2:1.00]
2753;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002754; SKX-LABEL: test_sqrtps:
2755; SKX: # BB#0:
2756; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002757; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00]
2758; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2759; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002760;
Simon Pilgrim93986492017-04-18 19:04:40 +00002761; BTVER2-LABEL: test_sqrtps:
2762; BTVER2: # BB#0:
2763; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
2764; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
2765; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2766; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002767;
2768; ZNVER1-LABEL: test_sqrtps:
2769; ZNVER1: # BB#0:
2770; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
2771; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
2772; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002773; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002774 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
2775 %2 = load <4 x float>, <4 x float> *%a1, align 16
2776 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
2777 %4 = fadd <4 x float> %1, %3
2778 ret <4 x float> %4
2779}
2780declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2781
2782; TODO - sqrtss_m
2783
2784define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
2785; GENERIC-LABEL: test_sqrtss:
2786; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002787; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00]
2788; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
2789; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00]
2790; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2791; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002792;
2793; ATOM-LABEL: test_sqrtss:
2794; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002795; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002796; ATOM-NEXT: sqrtss %xmm0, %xmm0
2797; ATOM-NEXT: sqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002798; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2799; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002800;
2801; SLM-LABEL: test_sqrtss:
2802; SLM: # BB#0:
2803; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
2804; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
2805; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
2806; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2807; SLM-NEXT: retq # sched: [4:1.00]
2808;
2809; SANDY-LABEL: test_sqrtss:
2810; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002811; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
2812; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
2813; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002814; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002815; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002816;
2817; HASWELL-LABEL: test_sqrtss:
2818; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002819; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
2820; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
2821; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002822; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002823; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002824;
Gadi Haber767d98b2017-08-30 08:08:50 +00002825; SKYLAKE-LABEL: test_sqrtss:
2826; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002827; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002828; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002829; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
2830; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002831; SKYLAKE-NEXT: retq # sched: [2:1.00]
2832;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002833; SKX-LABEL: test_sqrtss:
2834; SKX: # BB#0:
2835; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002836; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002837; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002838; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2839; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002840;
Simon Pilgrim93986492017-04-18 19:04:40 +00002841; BTVER2-LABEL: test_sqrtss:
2842; BTVER2: # BB#0:
2843; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
2844; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
2845; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
2846; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2847; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002848;
2849; ZNVER1-LABEL: test_sqrtss:
2850; ZNVER1: # BB#0:
2851; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
2852; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [27:1.00]
2853; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [27:1.00]
2854; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002855; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002856 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
2857 %2 = load <4 x float>, <4 x float> *%a1, align 16
2858 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
2859 %4 = fadd <4 x float> %1, %3
2860 ret <4 x float> %4
2861}
2862declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2863
2864define i32 @test_stmxcsr() {
2865; GENERIC-LABEL: test_stmxcsr:
2866; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002867; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
2868; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2869; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002870;
2871; ATOM-LABEL: test_stmxcsr:
2872; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002873; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
2874; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
2875; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002876;
2877; SLM-LABEL: test_stmxcsr:
2878; SLM: # BB#0:
2879; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2880; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
2881; SLM-NEXT: retq # sched: [4:1.00]
2882;
2883; SANDY-LABEL: test_stmxcsr:
2884; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002885; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
2886; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2887; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002888;
2889; HASWELL-LABEL: test_stmxcsr:
2890; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002891; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2892; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
2893; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002894;
Gadi Haber767d98b2017-08-30 08:08:50 +00002895; SKYLAKE-LABEL: test_stmxcsr:
2896; SKYLAKE: # BB#0:
2897; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2898; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
2899; SKYLAKE-NEXT: retq # sched: [2:1.00]
2900;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002901; SKX-LABEL: test_stmxcsr:
2902; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002903; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
2904; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
2905; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002906;
Simon Pilgrim93986492017-04-18 19:04:40 +00002907; BTVER2-LABEL: test_stmxcsr:
2908; BTVER2: # BB#0:
2909; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
2910; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
2911; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002912;
2913; ZNVER1-LABEL: test_stmxcsr:
2914; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002915; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
Craig Topper106b5b62017-07-19 02:45:14 +00002916; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002917; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002918 %1 = alloca i32, align 4
2919 %2 = bitcast i32* %1 to i8*
2920 call void @llvm.x86.sse.stmxcsr(i8* %2)
2921 %3 = load i32, i32* %1, align 4
2922 ret i32 %3
2923}
2924declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
2925
2926define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2927; GENERIC-LABEL: test_subps:
2928; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002929; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2930; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
2931; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002932;
2933; ATOM-LABEL: test_subps:
2934; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002935; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
2936; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
2937; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002938;
2939; SLM-LABEL: test_subps:
2940; SLM: # BB#0:
2941; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
2942; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
2943; SLM-NEXT: retq # sched: [4:1.00]
2944;
2945; SANDY-LABEL: test_subps:
2946; SANDY: # BB#0:
2947; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002948; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2949; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002950;
2951; HASWELL-LABEL: test_subps:
2952; HASWELL: # BB#0:
2953; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002954; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
2955; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002956;
Gadi Haber767d98b2017-08-30 08:08:50 +00002957; SKYLAKE-LABEL: test_subps:
2958; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002959; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2960; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002961; SKYLAKE-NEXT: retq # sched: [2:1.00]
2962;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002963; SKX-LABEL: test_subps:
2964; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002965; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2966; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2967; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002968;
Simon Pilgrim93986492017-04-18 19:04:40 +00002969; BTVER2-LABEL: test_subps:
2970; BTVER2: # BB#0:
2971; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2972; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
2973; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002974;
2975; ZNVER1-LABEL: test_subps:
2976; ZNVER1: # BB#0:
2977; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2978; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002979; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002980 %1 = fsub <4 x float> %a0, %a1
2981 %2 = load <4 x float>, <4 x float> *%a2, align 16
2982 %3 = fsub <4 x float> %1, %2
2983 ret <4 x float> %3
2984}
2985
2986define float @test_subss(float %a0, float %a1, float *%a2) {
2987; GENERIC-LABEL: test_subss:
2988; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002989; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
2990; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
2991; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002992;
2993; ATOM-LABEL: test_subss:
2994; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002995; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
2996; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
2997; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002998;
2999; SLM-LABEL: test_subss:
3000; SLM: # BB#0:
3001; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
3002; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
3003; SLM-NEXT: retq # sched: [4:1.00]
3004;
3005; SANDY-LABEL: test_subss:
3006; SANDY: # BB#0:
3007; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003008; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3009; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003010;
3011; HASWELL-LABEL: test_subss:
3012; HASWELL: # BB#0:
3013; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003014; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
3015; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003016;
Gadi Haber767d98b2017-08-30 08:08:50 +00003017; SKYLAKE-LABEL: test_subss:
3018; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003019; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3020; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00003021; SKYLAKE-NEXT: retq # sched: [2:1.00]
3022;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003023; SKX-LABEL: test_subss:
3024; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003025; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3026; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
3027; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003028;
Simon Pilgrim93986492017-04-18 19:04:40 +00003029; BTVER2-LABEL: test_subss:
3030; BTVER2: # BB#0:
3031; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3032; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3033; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003034;
3035; ZNVER1-LABEL: test_subss:
3036; ZNVER1: # BB#0:
3037; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3038; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003039; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003040 %1 = fsub float %a0, %a1
3041 %2 = load float, float *%a2, align 4
3042 %3 = fsub float %1, %2
3043 ret float %3
3044}
3045
3046define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3047; GENERIC-LABEL: test_ucomiss:
3048; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003049; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003050; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3051; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00003052; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
3053; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003054; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3055; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00003056; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
3057; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
3058; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
3059; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003060;
3061; ATOM-LABEL: test_ucomiss:
3062; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003063; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
3064; ATOM-NEXT: setnp %al # sched: [1:0.50]
3065; ATOM-NEXT: sete %cl # sched: [1:0.50]
3066; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
3067; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
3068; ATOM-NEXT: setnp %al # sched: [1:0.50]
3069; ATOM-NEXT: sete %dl # sched: [1:0.50]
3070; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
3071; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
3072; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
3073; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003074;
3075; SLM-LABEL: test_ucomiss:
3076; SLM: # BB#0:
3077; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
3078; SLM-NEXT: setnp %al # sched: [1:0.50]
3079; SLM-NEXT: sete %cl # sched: [1:0.50]
3080; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
3081; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
3082; SLM-NEXT: setnp %al # sched: [1:0.50]
3083; SLM-NEXT: sete %dl # sched: [1:0.50]
3084; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
3085; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
3086; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3087; SLM-NEXT: retq # sched: [4:1.00]
3088;
3089; SANDY-LABEL: test_ucomiss:
3090; SANDY: # BB#0:
3091; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003092; SANDY-NEXT: setnp %al # sched: [1:0.50]
3093; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003094; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
3095; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003096; SANDY-NEXT: setnp %al # sched: [1:0.50]
3097; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003098; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
3099; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
3100; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003101; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003102;
3103; HASWELL-LABEL: test_ucomiss:
3104; HASWELL: # BB#0:
3105; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00003106; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3107; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003108; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
3109; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00003110; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3111; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003112; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
3113; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
3114; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003115; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003116;
Gadi Haber767d98b2017-08-30 08:08:50 +00003117; SKYLAKE-LABEL: test_ucomiss:
3118; SKYLAKE: # BB#0:
3119; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003120; SKYLAKE-NEXT: setnp %al # sched: [1:1.00]
3121; SKYLAKE-NEXT: sete %cl # sched: [1:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003122; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003123; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3124; SKYLAKE-NEXT: setnp %al # sched: [1:1.00]
3125; SKYLAKE-NEXT: sete %dl # sched: [1:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003126; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
3127; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
3128; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3129; SKYLAKE-NEXT: retq # sched: [2:1.00]
3130;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003131; SKX-LABEL: test_ucomiss:
3132; SKX: # BB#0:
3133; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003134; SKX-NEXT: setnp %al # sched: [1:0.50]
3135; SKX-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003136; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
3137; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003138; SKX-NEXT: setnp %al # sched: [1:0.50]
3139; SKX-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003140; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
3141; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
3142; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003143; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003144;
Simon Pilgrim93986492017-04-18 19:04:40 +00003145; BTVER2-LABEL: test_ucomiss:
3146; BTVER2: # BB#0:
3147; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3148; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3149; BTVER2-NEXT: sete %cl # sched: [1:0.50]
3150; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
3151; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3152; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3153; BTVER2-NEXT: sete %dl # sched: [1:0.50]
3154; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
3155; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
3156; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3157; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003158;
3159; ZNVER1-LABEL: test_ucomiss:
3160; ZNVER1: # BB#0:
3161; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3162; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3163; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
3164; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
3165; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
3166; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3167; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
3168; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
3169; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
3170; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003171; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003172 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
3173 %2 = load <4 x float>, <4 x float> *%a2, align 4
3174 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
3175 %4 = or i32 %1, %3
3176 ret i32 %4
3177}
3178declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
3179
3180define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3181; GENERIC-LABEL: test_unpckhps:
3182; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003183; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3184; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3185; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003186;
3187; ATOM-LABEL: test_unpckhps:
3188; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003189; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3190; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3191; ATOM-NEXT: nop # sched: [1:0.50]
3192; ATOM-NEXT: nop # sched: [1:0.50]
3193; ATOM-NEXT: nop # sched: [1:0.50]
3194; ATOM-NEXT: nop # sched: [1:0.50]
3195; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003196;
3197; SLM-LABEL: test_unpckhps:
3198; SLM: # BB#0:
3199; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3200; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
3201; SLM-NEXT: retq # sched: [4:1.00]
3202;
3203; SANDY-LABEL: test_unpckhps:
3204; SANDY: # BB#0:
3205; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003206; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3207; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003208;
3209; HASWELL-LABEL: test_unpckhps:
3210; HASWELL: # BB#0:
3211; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003212; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3213; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003214;
Gadi Haber767d98b2017-08-30 08:08:50 +00003215; SKYLAKE-LABEL: test_unpckhps:
3216; SKYLAKE: # BB#0:
3217; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3218; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3219; SKYLAKE-NEXT: retq # sched: [2:1.00]
3220;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003221; SKX-LABEL: test_unpckhps:
3222; SKX: # BB#0:
3223; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003224; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3225; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003226;
Simon Pilgrim93986492017-04-18 19:04:40 +00003227; BTVER2-LABEL: test_unpckhps:
3228; BTVER2: # BB#0:
3229; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3230; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
3231; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003232;
3233; ZNVER1-LABEL: test_unpckhps:
3234; ZNVER1: # BB#0:
3235; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3236; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003237; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003238 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3239 %2 = load <4 x float>, <4 x float> *%a2, align 16
3240 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3241 ret <4 x float> %3
3242}
3243
3244define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3245; GENERIC-LABEL: test_unpcklps:
3246; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003247; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3248; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3249; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003250;
3251; ATOM-LABEL: test_unpcklps:
3252; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003253; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3254; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3255; ATOM-NEXT: nop # sched: [1:0.50]
3256; ATOM-NEXT: nop # sched: [1:0.50]
3257; ATOM-NEXT: nop # sched: [1:0.50]
3258; ATOM-NEXT: nop # sched: [1:0.50]
3259; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003260;
3261; SLM-LABEL: test_unpcklps:
3262; SLM: # BB#0:
3263; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3264; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
3265; SLM-NEXT: retq # sched: [4:1.00]
3266;
3267; SANDY-LABEL: test_unpcklps:
3268; SANDY: # BB#0:
3269; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003270; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3271; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003272;
3273; HASWELL-LABEL: test_unpcklps:
3274; HASWELL: # BB#0:
3275; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003276; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3277; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003278;
Gadi Haber767d98b2017-08-30 08:08:50 +00003279; SKYLAKE-LABEL: test_unpcklps:
3280; SKYLAKE: # BB#0:
3281; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3282; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3283; SKYLAKE-NEXT: retq # sched: [2:1.00]
3284;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003285; SKX-LABEL: test_unpcklps:
3286; SKX: # BB#0:
3287; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003288; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3289; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003290;
Simon Pilgrim93986492017-04-18 19:04:40 +00003291; BTVER2-LABEL: test_unpcklps:
3292; BTVER2: # BB#0:
3293; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3294; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
3295; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003296;
3297; ZNVER1-LABEL: test_unpcklps:
3298; ZNVER1: # BB#0:
3299; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3300; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003301; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003302 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3303 %2 = load <4 x float>, <4 x float> *%a2, align 16
3304 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3305 ret <4 x float> %3
3306}
3307
3308define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3309; GENERIC-LABEL: test_xorps:
3310; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003311; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
3312; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
3313; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003314;
3315; ATOM-LABEL: test_xorps:
3316; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00003317; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3318; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003319; ATOM-NEXT: nop # sched: [1:0.50]
3320; ATOM-NEXT: nop # sched: [1:0.50]
3321; ATOM-NEXT: nop # sched: [1:0.50]
3322; ATOM-NEXT: nop # sched: [1:0.50]
3323; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003324;
3325; SLM-LABEL: test_xorps:
3326; SLM: # BB#0:
3327; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3328; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
3329; SLM-NEXT: retq # sched: [4:1.00]
3330;
3331; SANDY-LABEL: test_xorps:
3332; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00003333; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3334; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3335; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003336;
3337; HASWELL-LABEL: test_xorps:
3338; HASWELL: # BB#0:
3339; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003340; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3341; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003342;
Gadi Haber767d98b2017-08-30 08:08:50 +00003343; SKYLAKE-LABEL: test_xorps:
3344; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003345; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3346; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00003347; SKYLAKE-NEXT: retq # sched: [2:1.00]
3348;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003349; SKX-LABEL: test_xorps:
3350; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003351; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
3352; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
3353; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003354;
Simon Pilgrim93986492017-04-18 19:04:40 +00003355; BTVER2-LABEL: test_xorps:
3356; BTVER2: # BB#0:
3357; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3358; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3359; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003360;
3361; ZNVER1-LABEL: test_xorps:
3362; ZNVER1: # BB#0:
3363; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
3364; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003365; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003366 %1 = bitcast <4 x float> %a0 to <4 x i32>
3367 %2 = bitcast <4 x float> %a1 to <4 x i32>
3368 %3 = xor <4 x i32> %1, %2
3369 %4 = load <4 x float>, <4 x float> *%a2, align 16
3370 %5 = bitcast <4 x float> %4 to <4 x i32>
3371 %6 = xor <4 x i32> %3, %5
3372 %7 = bitcast <4 x i32> %6 to <4 x float>
3373 ret <4 x float> %7
3374}
3375
3376!0 = !{i32 1}