blob: d3c995197e83b849cf26ef8abedac2db216b0d50 [file] [log] [blame]
Simon Pilgrim93986492017-04-18 19:04:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim93986492017-04-18 19:04:40 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber85d99b42017-10-17 13:45:39 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00009; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000010; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim93986492017-04-18 19:04:40 +000011; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
Craig Topper106b5b62017-07-19 02:45:14 +000012; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
Simon Pilgrim93986492017-04-18 19:04:40 +000013
14define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
15; GENERIC-LABEL: test_addps:
16; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000017; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
18; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
19; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000020;
21; ATOM-LABEL: test_addps:
22; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000023; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
24; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
25; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000026;
27; SLM-LABEL: test_addps:
28; SLM: # BB#0:
29; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
30; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
31; SLM-NEXT: retq # sched: [4:1.00]
32;
33; SANDY-LABEL: test_addps:
34; SANDY: # BB#0:
35; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +000036; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
37; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000038;
39; HASWELL-LABEL: test_addps:
40; HASWELL: # BB#0:
41; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +000042; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
43; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000044;
Gadi Haber85d99b42017-10-17 13:45:39 +000045; BROADWELL-LABEL: test_addps:
46; BROADWELL: # BB#0:
47; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +000048; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
49; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000050;
Gadi Haber767d98b2017-08-30 08:08:50 +000051; SKYLAKE-LABEL: test_addps:
52; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +000053; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000054; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
55; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +000056;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000057; SKX-LABEL: test_addps:
58; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +000059; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
60; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
61; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000062;
Simon Pilgrim93986492017-04-18 19:04:40 +000063; BTVER2-LABEL: test_addps:
64; BTVER2: # BB#0:
65; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
66; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
67; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +000068;
69; ZNVER1-LABEL: test_addps:
70; ZNVER1: # BB#0:
71; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
72; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +000073; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000074 %1 = fadd <4 x float> %a0, %a1
75 %2 = load <4 x float>, <4 x float> *%a2, align 16
76 %3 = fadd <4 x float> %1, %2
77 ret <4 x float> %3
78}
79
80define float @test_addss(float %a0, float %a1, float *%a2) {
81; GENERIC-LABEL: test_addss:
82; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +000083; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
84; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
85; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +000086;
87; ATOM-LABEL: test_addss:
88; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +000089; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
90; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
91; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +000092;
93; SLM-LABEL: test_addss:
94; SLM: # BB#0:
95; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
96; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
97; SLM-NEXT: retq # sched: [4:1.00]
98;
99; SANDY-LABEL: test_addss:
100; SANDY: # BB#0:
101; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000102; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
103; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000104;
105; HASWELL-LABEL: test_addss:
106; HASWELL: # BB#0:
107; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000108; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
109; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000110;
Gadi Haber85d99b42017-10-17 13:45:39 +0000111; BROADWELL-LABEL: test_addss:
112; BROADWELL: # BB#0:
113; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000114; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
115; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000116;
Gadi Haber767d98b2017-08-30 08:08:50 +0000117; SKYLAKE-LABEL: test_addss:
118; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000119; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000120; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
121; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000122;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000123; SKX-LABEL: test_addss:
124; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000125; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
126; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
127; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000128;
Simon Pilgrim93986492017-04-18 19:04:40 +0000129; BTVER2-LABEL: test_addss:
130; BTVER2: # BB#0:
131; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
132; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
133; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000134;
135; ZNVER1-LABEL: test_addss:
136; ZNVER1: # BB#0:
137; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
138; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000139; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000140 %1 = fadd float %a0, %a1
141 %2 = load float, float *%a2, align 4
142 %3 = fadd float %1, %2
143 ret float %3
144}
145
146define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
147; GENERIC-LABEL: test_andps:
148; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000149; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
150; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
151; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000152;
153; ATOM-LABEL: test_andps:
154; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000155; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
156; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000157; ATOM-NEXT: nop # sched: [1:0.50]
158; ATOM-NEXT: nop # sched: [1:0.50]
159; ATOM-NEXT: nop # sched: [1:0.50]
160; ATOM-NEXT: nop # sched: [1:0.50]
161; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000162;
163; SLM-LABEL: test_andps:
164; SLM: # BB#0:
165; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
166; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
167; SLM-NEXT: retq # sched: [4:1.00]
168;
169; SANDY-LABEL: test_andps:
170; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000171; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
172; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
173; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000174;
175; HASWELL-LABEL: test_andps:
176; HASWELL: # BB#0:
177; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000178; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
179; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000180;
Gadi Haber85d99b42017-10-17 13:45:39 +0000181; BROADWELL-LABEL: test_andps:
182; BROADWELL: # BB#0:
183; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000184; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
185; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000186;
Gadi Haber767d98b2017-08-30 08:08:50 +0000187; SKYLAKE-LABEL: test_andps:
188; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000189; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
190; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
191; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000192;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000193; SKX-LABEL: test_andps:
194; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000195; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
196; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
197; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000198;
Simon Pilgrim93986492017-04-18 19:04:40 +0000199; BTVER2-LABEL: test_andps:
200; BTVER2: # BB#0:
201; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
202; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
203; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000204;
205; ZNVER1-LABEL: test_andps:
206; ZNVER1: # BB#0:
207; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
208; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000209; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000210 %1 = bitcast <4 x float> %a0 to <4 x i32>
211 %2 = bitcast <4 x float> %a1 to <4 x i32>
212 %3 = and <4 x i32> %1, %2
213 %4 = load <4 x float>, <4 x float> *%a2, align 16
214 %5 = bitcast <4 x float> %4 to <4 x i32>
215 %6 = and <4 x i32> %3, %5
216 %7 = bitcast <4 x i32> %6 to <4 x float>
217 ret <4 x float> %7
218}
219
220define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
221; GENERIC-LABEL: test_andnotps:
222; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000223; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
224; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
225; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000226;
227; ATOM-LABEL: test_andnotps:
228; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +0000229; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
230; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000231; ATOM-NEXT: nop # sched: [1:0.50]
232; ATOM-NEXT: nop # sched: [1:0.50]
233; ATOM-NEXT: nop # sched: [1:0.50]
234; ATOM-NEXT: nop # sched: [1:0.50]
235; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000236;
237; SLM-LABEL: test_andnotps:
238; SLM: # BB#0:
239; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
240; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
241; SLM-NEXT: retq # sched: [4:1.00]
242;
243; SANDY-LABEL: test_andnotps:
244; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000245; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
246; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
247; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000248;
249; HASWELL-LABEL: test_andnotps:
250; HASWELL: # BB#0:
251; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000252; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
253; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000254;
Gadi Haber85d99b42017-10-17 13:45:39 +0000255; BROADWELL-LABEL: test_andnotps:
256; BROADWELL: # BB#0:
257; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000258; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
259; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000260;
Gadi Haber767d98b2017-08-30 08:08:50 +0000261; SKYLAKE-LABEL: test_andnotps:
262; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000263; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
264; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
265; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000266;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000267; SKX-LABEL: test_andnotps:
268; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000269; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
270; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
271; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000272;
Simon Pilgrim93986492017-04-18 19:04:40 +0000273; BTVER2-LABEL: test_andnotps:
274; BTVER2: # BB#0:
275; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
276; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
277; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000278;
279; ZNVER1-LABEL: test_andnotps:
280; ZNVER1: # BB#0:
281; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
282; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000283; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000284 %1 = bitcast <4 x float> %a0 to <4 x i32>
285 %2 = bitcast <4 x float> %a1 to <4 x i32>
286 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
287 %4 = and <4 x i32> %3, %2
288 %5 = load <4 x float>, <4 x float> *%a2, align 16
289 %6 = bitcast <4 x float> %5 to <4 x i32>
290 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
291 %8 = and <4 x i32> %6, %7
292 %9 = bitcast <4 x i32> %8 to <4 x float>
293 ret <4 x float> %9
294}
295
296define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
297; GENERIC-LABEL: test_cmpps:
298; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000299; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
300; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
301; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
302; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000303;
304; ATOM-LABEL: test_cmpps:
305; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000306; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
307; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
Simon Pilgrim486072d2017-08-01 17:51:20 +0000308; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000309; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000310;
311; SLM-LABEL: test_cmpps:
312; SLM: # BB#0:
313; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
314; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
315; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
316; SLM-NEXT: retq # sched: [4:1.00]
317;
318; SANDY-LABEL: test_cmpps:
319; SANDY: # BB#0:
320; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000321; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
322; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
323; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000324;
325; HASWELL-LABEL: test_cmpps:
326; HASWELL: # BB#0:
327; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000328; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000329; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000330; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000331;
Gadi Haber85d99b42017-10-17 13:45:39 +0000332; BROADWELL-LABEL: test_cmpps:
333; BROADWELL: # BB#0:
334; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000335; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000336; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000337; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000338;
Gadi Haber767d98b2017-08-30 08:08:50 +0000339; SKYLAKE-LABEL: test_cmpps:
340; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000341; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000342; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
343; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
344; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000345;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000346; SKX-LABEL: test_cmpps:
347; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000348; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00]
349; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00]
350; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000351; SKX-NEXT: vpmovm2d %k0, %xmm0
Gadi Haber684944b2017-10-08 12:52:54 +0000352; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000353;
Simon Pilgrim93986492017-04-18 19:04:40 +0000354; BTVER2-LABEL: test_cmpps:
355; BTVER2: # BB#0:
356; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
357; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
358; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
359; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000360;
361; ZNVER1-LABEL: test_cmpps:
362; ZNVER1: # BB#0:
363; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
364; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
365; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000366; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000367 %1 = fcmp oeq <4 x float> %a0, %a1
368 %2 = load <4 x float>, <4 x float> *%a2, align 16
369 %3 = fcmp oeq <4 x float> %a0, %2
370 %4 = or <4 x i1> %1, %3
371 %5 = sext <4 x i1> %4 to <4 x i32>
372 %6 = bitcast <4 x i32> %5 to <4 x float>
373 ret <4 x float> %6
374}
375
376define float @test_cmpss(float %a0, float %a1, float *%a2) {
377; GENERIC-LABEL: test_cmpss:
378; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000379; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
380; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
381; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000382;
383; ATOM-LABEL: test_cmpss:
384; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000385; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
386; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
387; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000388;
389; SLM-LABEL: test_cmpss:
390; SLM: # BB#0:
391; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
392; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
393; SLM-NEXT: retq # sched: [4:1.00]
394;
395; SANDY-LABEL: test_cmpss:
396; SANDY: # BB#0:
397; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
398; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000399; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000400;
401; HASWELL-LABEL: test_cmpss:
402; HASWELL: # BB#0:
403; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
404; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000405; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000406;
Gadi Haber85d99b42017-10-17 13:45:39 +0000407; BROADWELL-LABEL: test_cmpss:
408; BROADWELL: # BB#0:
409; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000410; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
411; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000412;
Gadi Haber767d98b2017-08-30 08:08:50 +0000413; SKYLAKE-LABEL: test_cmpss:
414; SKYLAKE: # BB#0:
415; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000416; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000417; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000418;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000419; SKX-LABEL: test_cmpss:
420; SKX: # BB#0:
421; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
422; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000423; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000424;
Simon Pilgrim93986492017-04-18 19:04:40 +0000425; BTVER2-LABEL: test_cmpss:
426; BTVER2: # BB#0:
427; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
428; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
429; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000430;
431; ZNVER1-LABEL: test_cmpss:
432; ZNVER1: # BB#0:
433; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
434; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000435; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000436 %1 = insertelement <4 x float> undef, float %a0, i32 0
437 %2 = insertelement <4 x float> undef, float %a1, i32 0
438 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
439 %4 = load float, float *%a2, align 4
440 %5 = insertelement <4 x float> undef, float %4, i32 0
441 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
442 %7 = extractelement <4 x float> %6, i32 0
443 ret float %7
444}
445declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
446
447define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
448; GENERIC-LABEL: test_comiss:
449; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000450; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000451; GENERIC-NEXT: setnp %al # sched: [1:0.50]
452; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000453; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
454; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000455; GENERIC-NEXT: setnp %al # sched: [1:0.50]
456; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +0000457; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
458; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
459; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
460; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000461;
462; ATOM-LABEL: test_comiss:
463; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000464; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
465; ATOM-NEXT: setnp %al # sched: [1:0.50]
466; ATOM-NEXT: sete %cl # sched: [1:0.50]
467; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
468; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
469; ATOM-NEXT: setnp %al # sched: [1:0.50]
470; ATOM-NEXT: sete %dl # sched: [1:0.50]
471; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
472; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
473; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
474; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000475;
476; SLM-LABEL: test_comiss:
477; SLM: # BB#0:
478; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
479; SLM-NEXT: setnp %al # sched: [1:0.50]
480; SLM-NEXT: sete %cl # sched: [1:0.50]
481; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
482; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
483; SLM-NEXT: setnp %al # sched: [1:0.50]
484; SLM-NEXT: sete %dl # sched: [1:0.50]
485; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
486; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
487; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
488; SLM-NEXT: retq # sched: [4:1.00]
489;
490; SANDY-LABEL: test_comiss:
491; SANDY: # BB#0:
492; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000493; SANDY-NEXT: setnp %al # sched: [1:0.50]
494; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000495; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
496; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +0000497; SANDY-NEXT: setnp %al # sched: [1:0.50]
498; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000499; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
500; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
501; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000502; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000503;
504; HASWELL-LABEL: test_comiss:
505; HASWELL: # BB#0:
506; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000507; HASWELL-NEXT: setnp %al # sched: [1:0.50]
508; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000509; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
510; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +0000511; HASWELL-NEXT: setnp %al # sched: [1:0.50]
512; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000513; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
514; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
515; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000516; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000517;
Gadi Haber85d99b42017-10-17 13:45:39 +0000518; BROADWELL-LABEL: test_comiss:
519; BROADWELL: # BB#0:
520; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
521; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
522; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
523; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +0000524; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000525; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
526; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
527; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
528; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
529; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +0000530; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000531;
Gadi Haber767d98b2017-08-30 08:08:50 +0000532; SKYLAKE-LABEL: test_comiss:
533; SKYLAKE: # BB#0:
534; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000535; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
536; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000537; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000538; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000539; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
540; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +0000541; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
542; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
543; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000544; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000545;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000546; SKX-LABEL: test_comiss:
547; SKX: # BB#0:
548; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000549; SKX-NEXT: setnp %al # sched: [1:0.50]
550; SKX-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000551; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
552; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000553; SKX-NEXT: setnp %al # sched: [1:0.50]
554; SKX-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000555; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
556; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
557; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000558; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000559;
Simon Pilgrim93986492017-04-18 19:04:40 +0000560; BTVER2-LABEL: test_comiss:
561; BTVER2: # BB#0:
562; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
563; BTVER2-NEXT: setnp %al # sched: [1:0.50]
564; BTVER2-NEXT: sete %cl # sched: [1:0.50]
565; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
566; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
567; BTVER2-NEXT: setnp %al # sched: [1:0.50]
568; BTVER2-NEXT: sete %dl # sched: [1:0.50]
569; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
570; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
571; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
572; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000573;
574; ZNVER1-LABEL: test_comiss:
575; ZNVER1: # BB#0:
576; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
577; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
578; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
579; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
580; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
581; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
582; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
583; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
584; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
585; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000586; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000587 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
588 %2 = load <4 x float>, <4 x float> *%a2, align 4
589 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
590 %4 = or i32 %1, %3
591 ret i32 %4
592}
593declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
594
595define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
596; GENERIC-LABEL: test_cvtsi2ss:
597; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000598; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
599; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
600; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
601; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000602;
603; ATOM-LABEL: test_cvtsi2ss:
604; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000605; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
606; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
607; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
608; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000609;
610; SLM-LABEL: test_cvtsi2ss:
611; SLM: # BB#0:
612; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
613; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
614; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
615; SLM-NEXT: retq # sched: [4:1.00]
616;
617; SANDY-LABEL: test_cvtsi2ss:
618; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000619; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
620; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000621; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000622; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000623;
624; HASWELL-LABEL: test_cvtsi2ss:
625; HASWELL: # BB#0:
626; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
627; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
628; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000629; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000630;
Gadi Haber85d99b42017-10-17 13:45:39 +0000631; BROADWELL-LABEL: test_cvtsi2ss:
632; BROADWELL: # BB#0:
633; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000634; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000635; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000636; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000637;
Gadi Haber767d98b2017-08-30 08:08:50 +0000638; SKYLAKE-LABEL: test_cvtsi2ss:
639; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000640; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
641; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
642; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000643; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000644;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000645; SKX-LABEL: test_cvtsi2ss:
646; SKX: # BB#0:
647; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
648; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000649; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
650; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000651;
Simon Pilgrim93986492017-04-18 19:04:40 +0000652; BTVER2-LABEL: test_cvtsi2ss:
653; BTVER2: # BB#0:
654; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
655; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
656; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
657; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000658;
659; ZNVER1-LABEL: test_cvtsi2ss:
660; ZNVER1: # BB#0:
661; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
662; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
663; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000664; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000665 %1 = sitofp i32 %a0 to float
666 %2 = load i32, i32 *%a1, align 4
667 %3 = sitofp i32 %2 to float
668 %4 = fadd float %1, %3
669 ret float %4
670}
671
672define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
673; GENERIC-LABEL: test_cvtsi2ssq:
674; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000675; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
676; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
677; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
678; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000679;
680; ATOM-LABEL: test_cvtsi2ssq:
681; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000682; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
683; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
684; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
685; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000686;
687; SLM-LABEL: test_cvtsi2ssq:
688; SLM: # BB#0:
689; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
690; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
691; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
692; SLM-NEXT: retq # sched: [4:1.00]
693;
694; SANDY-LABEL: test_cvtsi2ssq:
695; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000696; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
697; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000698; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000699; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000700;
701; HASWELL-LABEL: test_cvtsi2ssq:
702; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +0000703; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000704; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
705; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000706; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000707;
Gadi Haber85d99b42017-10-17 13:45:39 +0000708; BROADWELL-LABEL: test_cvtsi2ssq:
709; BROADWELL: # BB#0:
710; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000711; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000712; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000713; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000714;
Gadi Haber767d98b2017-08-30 08:08:50 +0000715; SKYLAKE-LABEL: test_cvtsi2ssq:
716; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000717; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
718; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
719; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000720; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000721;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000722; SKX-LABEL: test_cvtsi2ssq:
723; SKX: # BB#0:
724; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
725; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000726; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
727; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000728;
Simon Pilgrim93986492017-04-18 19:04:40 +0000729; BTVER2-LABEL: test_cvtsi2ssq:
730; BTVER2: # BB#0:
731; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
732; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
733; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
734; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000735;
736; ZNVER1-LABEL: test_cvtsi2ssq:
737; ZNVER1: # BB#0:
738; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
739; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
740; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000741; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000742 %1 = sitofp i64 %a0 to float
743 %2 = load i64, i64 *%a1, align 8
744 %3 = sitofp i64 %2 to float
745 %4 = fadd float %1, %3
746 ret float %4
747}
748
749define i32 @test_cvtss2si(float %a0, float *%a1) {
750; GENERIC-LABEL: test_cvtss2si:
751; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000752; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
753; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
754; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
755; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000756;
757; ATOM-LABEL: test_cvtss2si:
758; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000759; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
760; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
761; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
762; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000763;
764; SLM-LABEL: test_cvtss2si:
765; SLM: # BB#0:
766; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
767; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
768; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
769; SLM-NEXT: retq # sched: [4:1.00]
770;
771; SANDY-LABEL: test_cvtss2si:
772; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000773; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
774; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000775; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000776; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000777;
778; HASWELL-LABEL: test_cvtss2si:
779; HASWELL: # BB#0:
780; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000781; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000782; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000783; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000784;
Gadi Haber85d99b42017-10-17 13:45:39 +0000785; BROADWELL-LABEL: test_cvtss2si:
786; BROADWELL: # BB#0:
787; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000788; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000789; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +0000790; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000791;
Gadi Haber767d98b2017-08-30 08:08:50 +0000792; SKYLAKE-LABEL: test_cvtss2si:
793; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000794; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000795; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000796; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000797; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000798;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000799; SKX-LABEL: test_cvtss2si:
800; SKX: # BB#0:
801; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000802; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000803; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000804; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000805;
Simon Pilgrim93986492017-04-18 19:04:40 +0000806; BTVER2-LABEL: test_cvtss2si:
807; BTVER2: # BB#0:
808; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
809; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
810; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
811; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000812;
813; ZNVER1-LABEL: test_cvtss2si:
814; ZNVER1: # BB#0:
815; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
816; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
817; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000818; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000819 %1 = insertelement <4 x float> undef, float %a0, i32 0
820 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
821 %3 = load float, float *%a1, align 4
822 %4 = insertelement <4 x float> undef, float %3, i32 0
823 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
824 %6 = add i32 %2, %5
825 ret i32 %6
826}
827declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
828
829define i64 @test_cvtss2siq(float %a0, float *%a1) {
830; GENERIC-LABEL: test_cvtss2siq:
831; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000832; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
833; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
834; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
835; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000836;
837; ATOM-LABEL: test_cvtss2siq:
838; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000839; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
840; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
841; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
842; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000843;
844; SLM-LABEL: test_cvtss2siq:
845; SLM: # BB#0:
846; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
847; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
848; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
849; SLM-NEXT: retq # sched: [4:1.00]
850;
851; SANDY-LABEL: test_cvtss2siq:
852; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000853; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
854; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000855; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000856; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000857;
858; HASWELL-LABEL: test_cvtss2siq:
859; HASWELL: # BB#0:
860; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000861; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000862; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000863; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000864;
Gadi Haber85d99b42017-10-17 13:45:39 +0000865; BROADWELL-LABEL: test_cvtss2siq:
866; BROADWELL: # BB#0:
867; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000868; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000869; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +0000870; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000871;
Gadi Haber767d98b2017-08-30 08:08:50 +0000872; SKYLAKE-LABEL: test_cvtss2siq:
873; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000874; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000875; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000876; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000877; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000878;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000879; SKX-LABEL: test_cvtss2siq:
880; SKX: # BB#0:
881; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000882; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000883; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000884; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000885;
Simon Pilgrim93986492017-04-18 19:04:40 +0000886; BTVER2-LABEL: test_cvtss2siq:
887; BTVER2: # BB#0:
888; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
889; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
890; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
891; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000892;
893; ZNVER1-LABEL: test_cvtss2siq:
894; ZNVER1: # BB#0:
895; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
896; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
897; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000898; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000899 %1 = insertelement <4 x float> undef, float %a0, i32 0
900 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
901 %3 = load float, float *%a1, align 4
902 %4 = insertelement <4 x float> undef, float %3, i32 0
903 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
904 %6 = add i64 %2, %5
905 ret i64 %6
906}
907declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
908
909define i32 @test_cvttss2si(float %a0, float *%a1) {
910; GENERIC-LABEL: test_cvttss2si:
911; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000912; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
913; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
914; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
915; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000916;
917; ATOM-LABEL: test_cvttss2si:
918; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000919; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
920; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
921; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
922; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000923;
924; SLM-LABEL: test_cvttss2si:
925; SLM: # BB#0:
926; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
927; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
928; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
929; SLM-NEXT: retq # sched: [4:1.00]
930;
931; SANDY-LABEL: test_cvttss2si:
932; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +0000933; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
934; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000935; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +0000936; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000937;
938; HASWELL-LABEL: test_cvttss2si:
939; HASWELL: # BB#0:
940; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000941; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000942; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000943; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000944;
Gadi Haber85d99b42017-10-17 13:45:39 +0000945; BROADWELL-LABEL: test_cvttss2si:
946; BROADWELL: # BB#0:
947; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000948; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000949; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +0000950; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000951;
Gadi Haber767d98b2017-08-30 08:08:50 +0000952; SKYLAKE-LABEL: test_cvttss2si:
953; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000954; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000955; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000956; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000957; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000958;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000959; SKX-LABEL: test_cvttss2si:
960; SKX: # BB#0:
961; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000962; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000963; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +0000964; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000965;
Simon Pilgrim93986492017-04-18 19:04:40 +0000966; BTVER2-LABEL: test_cvttss2si:
967; BTVER2: # BB#0:
968; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
969; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
970; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
971; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +0000972;
973; ZNVER1-LABEL: test_cvttss2si:
974; ZNVER1: # BB#0:
975; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
976; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
977; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000978; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +0000979 %1 = fptosi float %a0 to i32
980 %2 = load float, float *%a1, align 4
981 %3 = fptosi float %2 to i32
982 %4 = add i32 %1, %3
983 ret i32 %4
984}
985
986define i64 @test_cvttss2siq(float %a0, float *%a1) {
987; GENERIC-LABEL: test_cvttss2siq:
988; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000989; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
990; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
991; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
992; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +0000993;
994; ATOM-LABEL: test_cvttss2siq:
995; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +0000996; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
997; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
998; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
999; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001000;
1001; SLM-LABEL: test_cvttss2siq:
1002; SLM: # BB#0:
1003; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
1004; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
1005; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
1006; SLM-NEXT: retq # sched: [4:1.00]
1007;
1008; SANDY-LABEL: test_cvttss2siq:
1009; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001010; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
1011; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001012; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001013; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001014;
1015; HASWELL-LABEL: test_cvttss2siq:
1016; HASWELL: # BB#0:
1017; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001018; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001019; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001020; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001021;
Gadi Haber85d99b42017-10-17 13:45:39 +00001022; BROADWELL-LABEL: test_cvttss2siq:
1023; BROADWELL: # BB#0:
1024; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001025; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001026; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00001027; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001028;
Gadi Haber767d98b2017-08-30 08:08:50 +00001029; SKYLAKE-LABEL: test_cvttss2siq:
1030; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001031; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001032; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001033; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001034; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001035;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001036; SKX-LABEL: test_cvttss2siq:
1037; SKX: # BB#0:
1038; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001039; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001040; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00001041; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001042;
Simon Pilgrim93986492017-04-18 19:04:40 +00001043; BTVER2-LABEL: test_cvttss2siq:
1044; BTVER2: # BB#0:
1045; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
1046; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
1047; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
1048; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001049;
1050; ZNVER1-LABEL: test_cvttss2siq:
1051; ZNVER1: # BB#0:
1052; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
1053; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
1054; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001055; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001056 %1 = fptosi float %a0 to i64
1057 %2 = load float, float *%a1, align 4
1058 %3 = fptosi float %2 to i64
1059 %4 = add i64 %1, %3
1060 ret i64 %4
1061}
1062
1063define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1064; GENERIC-LABEL: test_divps:
1065; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001066; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00]
1067; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00]
1068; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001069;
1070; ATOM-LABEL: test_divps:
1071; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001072; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
1073; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [125:62.50]
1074; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001075;
1076; SLM-LABEL: test_divps:
1077; SLM: # BB#0:
1078; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
1079; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
1080; SLM-NEXT: retq # sched: [4:1.00]
1081;
1082; SANDY-LABEL: test_divps:
1083; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001084; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
1085; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
1086; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001087;
1088; HASWELL-LABEL: test_divps:
1089; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001090; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
1091; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
1092; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001093;
Gadi Haber85d99b42017-10-17 13:45:39 +00001094; BROADWELL-LABEL: test_divps:
1095; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00001096; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1097; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1098; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001099;
Gadi Haber767d98b2017-08-30 08:08:50 +00001100; SKYLAKE-LABEL: test_divps:
1101; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001102; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001103; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
1104; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001105;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001106; SKX-LABEL: test_divps:
1107; SKX: # BB#0:
1108; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001109; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
1110; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001111;
Simon Pilgrim93986492017-04-18 19:04:40 +00001112; BTVER2-LABEL: test_divps:
1113; BTVER2: # BB#0:
1114; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1115; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1116; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001117;
1118; ZNVER1-LABEL: test_divps:
1119; ZNVER1: # BB#0:
1120; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1121; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001122; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001123 %1 = fdiv <4 x float> %a0, %a1
1124 %2 = load <4 x float>, <4 x float> *%a2, align 16
1125 %3 = fdiv <4 x float> %1, %2
1126 ret <4 x float> %3
1127}
1128
1129define float @test_divss(float %a0, float %a1, float *%a2) {
1130; GENERIC-LABEL: test_divss:
1131; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001132; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00]
1133; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00]
1134; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001135;
1136; ATOM-LABEL: test_divss:
1137; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001138; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
1139; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [62:31.00]
1140; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001141;
1142; SLM-LABEL: test_divss:
1143; SLM: # BB#0:
1144; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
1145; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
1146; SLM-NEXT: retq # sched: [4:1.00]
1147;
1148; SANDY-LABEL: test_divss:
1149; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001150; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
1151; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
1152; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001153;
1154; HASWELL-LABEL: test_divss:
1155; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001156; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
1157; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
1158; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001159;
Gadi Haber85d99b42017-10-17 13:45:39 +00001160; BROADWELL-LABEL: test_divss:
1161; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00001162; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1163; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1164; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001165;
Gadi Haber767d98b2017-08-30 08:08:50 +00001166; SKYLAKE-LABEL: test_divss:
1167; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001168; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001169; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1170; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001171;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001172; SKX-LABEL: test_divss:
1173; SKX: # BB#0:
1174; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001175; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1176; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001177;
Simon Pilgrim93986492017-04-18 19:04:40 +00001178; BTVER2-LABEL: test_divss:
1179; BTVER2: # BB#0:
1180; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1181; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1182; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001183;
1184; ZNVER1-LABEL: test_divss:
1185; ZNVER1: # BB#0:
1186; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1187; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001188; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001189 %1 = fdiv float %a0, %a1
1190 %2 = load float, float *%a2, align 4
1191 %3 = fdiv float %1, %2
1192 ret float %3
1193}
1194
1195define void @test_ldmxcsr(i32 %a0) {
1196; GENERIC-LABEL: test_ldmxcsr:
1197; GENERIC: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001198; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +00001199; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1200; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001201;
1202; ATOM-LABEL: test_ldmxcsr:
1203; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001204; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1205; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
1206; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001207;
1208; SLM-LABEL: test_ldmxcsr:
1209; SLM: # BB#0:
1210; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1211; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
1212; SLM-NEXT: retq # sched: [4:1.00]
1213;
1214; SANDY-LABEL: test_ldmxcsr:
1215; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00001216; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001217; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1218; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001219;
1220; HASWELL-LABEL: test_ldmxcsr:
1221; HASWELL: # BB#0:
1222; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001223; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1224; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001225;
Gadi Haber85d99b42017-10-17 13:45:39 +00001226; BROADWELL-LABEL: test_ldmxcsr:
1227; BROADWELL: # BB#0:
1228; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001229; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1230; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001231;
Gadi Haber767d98b2017-08-30 08:08:50 +00001232; SKYLAKE-LABEL: test_ldmxcsr:
1233; SKYLAKE: # BB#0:
1234; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001235; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1236; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001237;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001238; SKX-LABEL: test_ldmxcsr:
1239; SKX: # BB#0:
1240; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001241; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1242; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001243;
Simon Pilgrim93986492017-04-18 19:04:40 +00001244; BTVER2-LABEL: test_ldmxcsr:
1245; BTVER2: # BB#0:
1246; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1247; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1248; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001249;
1250; ZNVER1-LABEL: test_ldmxcsr:
1251; ZNVER1: # BB#0:
1252; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001253; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
1254; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001255 %1 = alloca i32, align 4
1256 %2 = bitcast i32* %1 to i8*
1257 store i32 %a0, i32* %1
1258 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1259 ret void
1260}
1261declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
1262
1263define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1264; GENERIC-LABEL: test_maxps:
1265; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001266; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1267; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
1268; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001269;
1270; ATOM-LABEL: test_maxps:
1271; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001272; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
1273; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
1274; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001275;
1276; SLM-LABEL: test_maxps:
1277; SLM: # BB#0:
1278; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1279; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
1280; SLM-NEXT: retq # sched: [4:1.00]
1281;
1282; SANDY-LABEL: test_maxps:
1283; SANDY: # BB#0:
1284; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001285; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1286; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001287;
1288; HASWELL-LABEL: test_maxps:
1289; HASWELL: # BB#0:
1290; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001291; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1292; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001293;
Gadi Haber85d99b42017-10-17 13:45:39 +00001294; BROADWELL-LABEL: test_maxps:
1295; BROADWELL: # BB#0:
1296; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001297; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1298; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001299;
Gadi Haber767d98b2017-08-30 08:08:50 +00001300; SKYLAKE-LABEL: test_maxps:
1301; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001302; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001303; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1304; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001305;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001306; SKX-LABEL: test_maxps:
1307; SKX: # BB#0:
1308; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1309; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001310; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001311;
Simon Pilgrim93986492017-04-18 19:04:40 +00001312; BTVER2-LABEL: test_maxps:
1313; BTVER2: # BB#0:
1314; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1315; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1316; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001317;
1318; ZNVER1-LABEL: test_maxps:
1319; ZNVER1: # BB#0:
1320; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1321; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001322; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001323 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1324 %2 = load <4 x float>, <4 x float> *%a2, align 16
1325 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
1326 ret <4 x float> %3
1327}
1328declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1329
1330define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1331; GENERIC-LABEL: test_maxss:
1332; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001333; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1334; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
1335; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001336;
1337; ATOM-LABEL: test_maxss:
1338; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001339; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
1340; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
1341; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001342;
1343; SLM-LABEL: test_maxss:
1344; SLM: # BB#0:
1345; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1346; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
1347; SLM-NEXT: retq # sched: [4:1.00]
1348;
1349; SANDY-LABEL: test_maxss:
1350; SANDY: # BB#0:
1351; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001352; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1353; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001354;
1355; HASWELL-LABEL: test_maxss:
1356; HASWELL: # BB#0:
1357; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001358; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1359; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001360;
Gadi Haber85d99b42017-10-17 13:45:39 +00001361; BROADWELL-LABEL: test_maxss:
1362; BROADWELL: # BB#0:
1363; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001364; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1365; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001366;
Gadi Haber767d98b2017-08-30 08:08:50 +00001367; SKYLAKE-LABEL: test_maxss:
1368; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001369; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001370; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1371; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001372;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001373; SKX-LABEL: test_maxss:
1374; SKX: # BB#0:
1375; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001376; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1377; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001378;
Simon Pilgrim93986492017-04-18 19:04:40 +00001379; BTVER2-LABEL: test_maxss:
1380; BTVER2: # BB#0:
1381; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1382; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1383; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001384;
1385; ZNVER1-LABEL: test_maxss:
1386; ZNVER1: # BB#0:
1387; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1388; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001389; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001390 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1391 %2 = load <4 x float>, <4 x float> *%a2, align 16
1392 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
1393 ret <4 x float> %3
1394}
1395declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1396
1397define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1398; GENERIC-LABEL: test_minps:
1399; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001400; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1401; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
1402; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001403;
1404; ATOM-LABEL: test_minps:
1405; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001406; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
1407; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
1408; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001409;
1410; SLM-LABEL: test_minps:
1411; SLM: # BB#0:
1412; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1413; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
1414; SLM-NEXT: retq # sched: [4:1.00]
1415;
1416; SANDY-LABEL: test_minps:
1417; SANDY: # BB#0:
1418; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001419; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1420; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001421;
1422; HASWELL-LABEL: test_minps:
1423; HASWELL: # BB#0:
1424; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001425; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1426; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001427;
Gadi Haber85d99b42017-10-17 13:45:39 +00001428; BROADWELL-LABEL: test_minps:
1429; BROADWELL: # BB#0:
1430; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001431; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1432; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001433;
Gadi Haber767d98b2017-08-30 08:08:50 +00001434; SKYLAKE-LABEL: test_minps:
1435; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001436; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001437; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1438; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001439;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001440; SKX-LABEL: test_minps:
1441; SKX: # BB#0:
1442; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1443; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001444; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001445;
Simon Pilgrim93986492017-04-18 19:04:40 +00001446; BTVER2-LABEL: test_minps:
1447; BTVER2: # BB#0:
1448; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1449; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1450; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001451;
1452; ZNVER1-LABEL: test_minps:
1453; ZNVER1: # BB#0:
1454; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1455; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001456; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001457 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1458 %2 = load <4 x float>, <4 x float> *%a2, align 16
1459 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
1460 ret <4 x float> %3
1461}
1462declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1463
1464define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1465; GENERIC-LABEL: test_minss:
1466; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001467; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1468; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
1469; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001470;
1471; ATOM-LABEL: test_minss:
1472; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001473; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
1474; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
1475; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001476;
1477; SLM-LABEL: test_minss:
1478; SLM: # BB#0:
1479; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1480; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
1481; SLM-NEXT: retq # sched: [4:1.00]
1482;
1483; SANDY-LABEL: test_minss:
1484; SANDY: # BB#0:
1485; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001486; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1487; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001488;
1489; HASWELL-LABEL: test_minss:
1490; HASWELL: # BB#0:
1491; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001492; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1493; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001494;
Gadi Haber85d99b42017-10-17 13:45:39 +00001495; BROADWELL-LABEL: test_minss:
1496; BROADWELL: # BB#0:
1497; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001498; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1499; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001500;
Gadi Haber767d98b2017-08-30 08:08:50 +00001501; SKYLAKE-LABEL: test_minss:
1502; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001503; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001504; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1505; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001506;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001507; SKX-LABEL: test_minss:
1508; SKX: # BB#0:
1509; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001510; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1511; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001512;
Simon Pilgrim93986492017-04-18 19:04:40 +00001513; BTVER2-LABEL: test_minss:
1514; BTVER2: # BB#0:
1515; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1516; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1517; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001518;
1519; ZNVER1-LABEL: test_minss:
1520; ZNVER1: # BB#0:
1521; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1522; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001523; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001524 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1525 %2 = load <4 x float>, <4 x float> *%a2, align 16
1526 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
1527 ret <4 x float> %3
1528}
1529declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1530
1531define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
1532; GENERIC-LABEL: test_movaps:
1533; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001534; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
1535; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1536; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
1537; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001538;
1539; ATOM-LABEL: test_movaps:
1540; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001541; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
1542; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1543; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1544; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001545;
1546; SLM-LABEL: test_movaps:
1547; SLM: # BB#0:
1548; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1549; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1550; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1551; SLM-NEXT: retq # sched: [4:1.00]
1552;
1553; SANDY-LABEL: test_movaps:
1554; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001555; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001556; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001557; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
1558; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001559;
1560; HASWELL-LABEL: test_movaps:
1561; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001562; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001563; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001564; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001565; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001566;
Gadi Haber85d99b42017-10-17 13:45:39 +00001567; BROADWELL-LABEL: test_movaps:
1568; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00001569; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001570; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1571; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001572; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001573;
Gadi Haber767d98b2017-08-30 08:08:50 +00001574; SKYLAKE-LABEL: test_movaps:
1575; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001576; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001577; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001578; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001579; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001580;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001581; SKX-LABEL: test_movaps:
1582; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001583; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
1584; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001585; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001586; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001587;
Simon Pilgrim93986492017-04-18 19:04:40 +00001588; BTVER2-LABEL: test_movaps:
1589; BTVER2: # BB#0:
1590; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1591; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1592; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1593; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001594;
1595; ZNVER1-LABEL: test_movaps:
1596; ZNVER1: # BB#0:
1597; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
1598; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1599; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001600; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001601 %1 = load <4 x float>, <4 x float> *%a0, align 16
1602 %2 = fadd <4 x float> %1, %1
1603 store <4 x float> %2, <4 x float> *%a1, align 16
1604 ret void
1605}
1606
1607; TODO (v)movhlps
1608
1609define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1610; GENERIC-LABEL: test_movhlps:
1611; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001612; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1613; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001614;
1615; ATOM-LABEL: test_movhlps:
1616; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001617; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1618; ATOM-NEXT: nop # sched: [1:0.50]
1619; ATOM-NEXT: nop # sched: [1:0.50]
1620; ATOM-NEXT: nop # sched: [1:0.50]
1621; ATOM-NEXT: nop # sched: [1:0.50]
1622; ATOM-NEXT: nop # sched: [1:0.50]
1623; ATOM-NEXT: nop # sched: [1:0.50]
1624; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001625;
1626; SLM-LABEL: test_movhlps:
1627; SLM: # BB#0:
1628; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1629; SLM-NEXT: retq # sched: [4:1.00]
1630;
1631; SANDY-LABEL: test_movhlps:
1632; SANDY: # BB#0:
1633; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001634; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001635;
1636; HASWELL-LABEL: test_movhlps:
1637; HASWELL: # BB#0:
1638; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001639; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001640;
Gadi Haber85d99b42017-10-17 13:45:39 +00001641; BROADWELL-LABEL: test_movhlps:
1642; BROADWELL: # BB#0:
1643; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001644; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001645;
Gadi Haber767d98b2017-08-30 08:08:50 +00001646; SKYLAKE-LABEL: test_movhlps:
1647; SKYLAKE: # BB#0:
1648; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001649; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001650;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001651; SKX-LABEL: test_movhlps:
1652; SKX: # BB#0:
1653; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001654; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001655;
Simon Pilgrim93986492017-04-18 19:04:40 +00001656; BTVER2-LABEL: test_movhlps:
1657; BTVER2: # BB#0:
1658; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1659; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001660;
1661; ZNVER1-LABEL: test_movhlps:
1662; ZNVER1: # BB#0:
1663; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001664; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001665 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1666 ret <4 x float> %1
1667}
1668
1669; TODO (v)movhps
1670
1671define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1672; GENERIC-LABEL: test_movhps:
1673; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001674; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
1675; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1676; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1677; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1678; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001679;
1680; ATOM-LABEL: test_movhps:
1681; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001682; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1683; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1684; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1685; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1686; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001687;
1688; SLM-LABEL: test_movhps:
1689; SLM: # BB#0:
1690; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1691; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1692; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1693; SLM-NEXT: retq # sched: [4:1.00]
1694;
1695; SANDY-LABEL: test_movhps:
1696; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001697; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001698; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1699; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001700; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001701;
1702; HASWELL-LABEL: test_movhps:
1703; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001704; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001705; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001706; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1707; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001708;
Gadi Haber85d99b42017-10-17 13:45:39 +00001709; BROADWELL-LABEL: test_movhps:
1710; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00001711; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001712; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001713; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1714; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001715;
Gadi Haber767d98b2017-08-30 08:08:50 +00001716; SKYLAKE-LABEL: test_movhps:
1717; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001718; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001719; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001720; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1721; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001722;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001723; SKX-LABEL: test_movhps:
1724; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001725; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1726; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1727; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1728; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001729;
Simon Pilgrim93986492017-04-18 19:04:40 +00001730; BTVER2-LABEL: test_movhps:
1731; BTVER2: # BB#0:
1732; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1733; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1734; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1735; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001736;
1737; ZNVER1-LABEL: test_movhps:
1738; ZNVER1: # BB#0:
1739; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
1740; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1741; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001742; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001743 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1744 %2 = load <2 x float>, <2 x float> *%1, align 8
1745 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1746 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1747 %5 = fadd <4 x float> %a0, %4
1748 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1749 store <2 x float> %6, <2 x float>* %1
1750 ret void
1751}
1752
1753; TODO (v)movlhps
1754
1755define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1756; GENERIC-LABEL: test_movlhps:
1757; GENERIC: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001758; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim84846982017-08-01 15:14:35 +00001759; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1760; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001761;
1762; ATOM-LABEL: test_movlhps:
1763; ATOM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001764; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001765; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1766; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001767;
1768; SLM-LABEL: test_movlhps:
1769; SLM: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001770; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001771; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1772; SLM-NEXT: retq # sched: [4:1.00]
1773;
1774; SANDY-LABEL: test_movlhps:
1775; SANDY: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001776; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001777; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001778; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001779;
1780; HASWELL-LABEL: test_movlhps:
1781; HASWELL: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001782; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001783; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001784; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001785;
Gadi Haber85d99b42017-10-17 13:45:39 +00001786; BROADWELL-LABEL: test_movlhps:
1787; BROADWELL: # BB#0:
1788; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1789; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001790; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001791;
Gadi Haber767d98b2017-08-30 08:08:50 +00001792; SKYLAKE-LABEL: test_movlhps:
1793; SKYLAKE: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001794; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001795; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001796; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001797;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001798; SKX-LABEL: test_movlhps:
1799; SKX: # BB#0:
1800; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001801; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
1802; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001803;
Simon Pilgrim93986492017-04-18 19:04:40 +00001804; BTVER2-LABEL: test_movlhps:
1805; BTVER2: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001806; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001807; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1808; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001809;
1810; ZNVER1-LABEL: test_movlhps:
1811; ZNVER1: # BB#0:
Craig Toppera6054322017-09-18 04:40:58 +00001812; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00001813; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001814; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001815 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1816 %2 = fadd <4 x float> %a1, %1
1817 ret <4 x float> %2
1818}
1819
1820define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1821; GENERIC-LABEL: test_movlps:
1822; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001823; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
1824; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1825; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1826; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001827;
1828; ATOM-LABEL: test_movlps:
1829; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001830; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1831; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1832; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1833; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001834;
1835; SLM-LABEL: test_movlps:
1836; SLM: # BB#0:
1837; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1838; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1839; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1840; SLM-NEXT: retq # sched: [4:1.00]
1841;
1842; SANDY-LABEL: test_movlps:
1843; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001844; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001845; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00001846; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
1847; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001848;
1849; HASWELL-LABEL: test_movlps:
1850; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00001851; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001852; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00001853; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001854; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001855;
Gadi Haber85d99b42017-10-17 13:45:39 +00001856; BROADWELL-LABEL: test_movlps:
1857; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00001858; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001859; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1860; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001861; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001862;
Gadi Haber767d98b2017-08-30 08:08:50 +00001863; SKYLAKE-LABEL: test_movlps:
1864; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001865; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001866; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00001867; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001868; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001869;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001870; SKX-LABEL: test_movlps:
1871; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001872; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1873; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001874; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001875; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001876;
Simon Pilgrim93986492017-04-18 19:04:40 +00001877; BTVER2-LABEL: test_movlps:
1878; BTVER2: # BB#0:
1879; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1880; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1881; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1882; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001883;
1884; ZNVER1-LABEL: test_movlps:
1885; ZNVER1: # BB#0:
1886; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
1887; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1888; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001889; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001890 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1891 %2 = load <2 x float>, <2 x float> *%1, align 8
1892 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1893 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1894 %5 = fadd <4 x float> %a0, %4
1895 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1896 store <2 x float> %6, <2 x float>* %1
1897 ret void
1898}
1899
1900define i32 @test_movmskps(<4 x float> %a0) {
1901; GENERIC-LABEL: test_movmskps:
1902; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001903; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
1904; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001905;
1906; ATOM-LABEL: test_movmskps:
1907; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001908; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
1909; ATOM-NEXT: nop # sched: [1:0.50]
1910; ATOM-NEXT: nop # sched: [1:0.50]
1911; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001912;
1913; SLM-LABEL: test_movmskps:
1914; SLM: # BB#0:
1915; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1916; SLM-NEXT: retq # sched: [4:1.00]
1917;
1918; SANDY-LABEL: test_movmskps:
1919; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001920; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1921; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001922;
1923; HASWELL-LABEL: test_movmskps:
1924; HASWELL: # BB#0:
1925; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001926; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001927;
Gadi Haber85d99b42017-10-17 13:45:39 +00001928; BROADWELL-LABEL: test_movmskps:
1929; BROADWELL: # BB#0:
1930; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001931; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001932;
Gadi Haber767d98b2017-08-30 08:08:50 +00001933; SKYLAKE-LABEL: test_movmskps:
1934; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001935; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001936; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001937;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001938; SKX-LABEL: test_movmskps:
1939; SKX: # BB#0:
1940; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001941; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001942;
Simon Pilgrim93986492017-04-18 19:04:40 +00001943; BTVER2-LABEL: test_movmskps:
1944; BTVER2: # BB#0:
Andrew V. Tischenko3c8bf5e2017-11-02 10:33:41 +00001945; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001946; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00001947;
1948; ZNVER1-LABEL: test_movmskps:
1949; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001950; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
1951; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001952 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1953 ret i32 %1
1954}
1955declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1956
1957define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1958; GENERIC-LABEL: test_movntps:
1959; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001960; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
1961; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001962;
1963; ATOM-LABEL: test_movntps:
1964; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00001965; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1966; ATOM-NEXT: nop # sched: [1:0.50]
1967; ATOM-NEXT: nop # sched: [1:0.50]
1968; ATOM-NEXT: nop # sched: [1:0.50]
1969; ATOM-NEXT: nop # sched: [1:0.50]
1970; ATOM-NEXT: nop # sched: [1:0.50]
1971; ATOM-NEXT: nop # sched: [1:0.50]
1972; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00001973;
1974; SLM-LABEL: test_movntps:
1975; SLM: # BB#0:
1976; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1977; SLM-NEXT: retq # sched: [4:1.00]
1978;
1979; SANDY-LABEL: test_movntps:
1980; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00001981; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
1982; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001983;
1984; HASWELL-LABEL: test_movntps:
1985; HASWELL: # BB#0:
Michael Zuckermanf6684002017-06-28 11:23:31 +00001986; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001987; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00001988;
Gadi Haber85d99b42017-10-17 13:45:39 +00001989; BROADWELL-LABEL: test_movntps:
1990; BROADWELL: # BB#0:
1991; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001992; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001993;
Gadi Haber767d98b2017-08-30 08:08:50 +00001994; SKYLAKE-LABEL: test_movntps:
1995; SKYLAKE: # BB#0:
1996; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001997; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001998;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001999; SKX-LABEL: test_movntps:
2000; SKX: # BB#0:
2001; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002002; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002003;
Simon Pilgrim93986492017-04-18 19:04:40 +00002004; BTVER2-LABEL: test_movntps:
2005; BTVER2: # BB#0:
2006; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
2007; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002008;
2009; ZNVER1-LABEL: test_movntps:
2010; ZNVER1: # BB#0:
2011; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002012; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002013 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
2014 ret void
2015}
2016
2017define void @test_movss_mem(float* %a0, float* %a1) {
2018; GENERIC-LABEL: test_movss_mem:
2019; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002020; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
2021; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
2022; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
2023; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002024;
2025; ATOM-LABEL: test_movss_mem:
2026; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002027; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
2028; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
2029; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
2030; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002031;
2032; SLM-LABEL: test_movss_mem:
2033; SLM: # BB#0:
2034; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
2035; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
2036; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
2037; SLM-NEXT: retq # sched: [4:1.00]
2038;
2039; SANDY-LABEL: test_movss_mem:
2040; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002041; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002042; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002043; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
2044; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002045;
2046; HASWELL-LABEL: test_movss_mem:
2047; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002048; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002049; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002050; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002051; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002052;
Gadi Haber85d99b42017-10-17 13:45:39 +00002053; BROADWELL-LABEL: test_movss_mem:
2054; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002055; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002056; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2057; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002058; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002059;
Gadi Haber767d98b2017-08-30 08:08:50 +00002060; SKYLAKE-LABEL: test_movss_mem:
2061; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002062; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002063; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002064; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002065; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002066;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002067; SKX-LABEL: test_movss_mem:
2068; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002069; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
2070; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002071; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002072; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002073;
Simon Pilgrim93986492017-04-18 19:04:40 +00002074; BTVER2-LABEL: test_movss_mem:
2075; BTVER2: # BB#0:
2076; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
2077; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2078; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2079; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002080;
2081; ZNVER1-LABEL: test_movss_mem:
2082; ZNVER1: # BB#0:
2083; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
2084; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2085; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002086; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002087 %1 = load float, float* %a0, align 1
2088 %2 = fadd float %1, %1
2089 store float %2, float *%a1, align 1
2090 ret void
2091}
2092
2093define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
2094; GENERIC-LABEL: test_movss_reg:
2095; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002096; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
2097; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002098;
2099; ATOM-LABEL: test_movss_reg:
2100; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002101; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2102; ATOM-NEXT: nop # sched: [1:0.50]
2103; ATOM-NEXT: nop # sched: [1:0.50]
2104; ATOM-NEXT: nop # sched: [1:0.50]
2105; ATOM-NEXT: nop # sched: [1:0.50]
2106; ATOM-NEXT: nop # sched: [1:0.50]
2107; ATOM-NEXT: nop # sched: [1:0.50]
2108; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002109;
2110; SLM-LABEL: test_movss_reg:
2111; SLM: # BB#0:
2112; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
2113; SLM-NEXT: retq # sched: [4:1.00]
2114;
2115; SANDY-LABEL: test_movss_reg:
2116; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002117; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002118; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002119;
2120; HASWELL-LABEL: test_movss_reg:
2121; HASWELL: # BB#0:
2122; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002123; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002124;
Gadi Haber85d99b42017-10-17 13:45:39 +00002125; BROADWELL-LABEL: test_movss_reg:
2126; BROADWELL: # BB#0:
2127; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00002128; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002129;
Gadi Haber767d98b2017-08-30 08:08:50 +00002130; SKYLAKE-LABEL: test_movss_reg:
2131; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002132; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
2133; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002134;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002135; SKX-LABEL: test_movss_reg:
2136; SKX: # BB#0:
2137; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002138; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002139;
Simon Pilgrim93986492017-04-18 19:04:40 +00002140; BTVER2-LABEL: test_movss_reg:
2141; BTVER2: # BB#0:
2142; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2143; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002144;
2145; ZNVER1-LABEL: test_movss_reg:
2146; ZNVER1: # BB#0:
2147; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002148; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002149 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
2150 ret <4 x float> %1
2151}
2152
2153define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
2154; GENERIC-LABEL: test_movups:
2155; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002156; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
2157; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
2158; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
2159; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002160;
2161; ATOM-LABEL: test_movups:
2162; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002163; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
2164; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
2165; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
2166; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002167;
2168; SLM-LABEL: test_movups:
2169; SLM: # BB#0:
2170; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
2171; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
2172; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
2173; SLM-NEXT: retq # sched: [4:1.00]
2174;
2175; SANDY-LABEL: test_movups:
2176; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002177; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002178; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002179; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
2180; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002181;
2182; HASWELL-LABEL: test_movups:
2183; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002184; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002185; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00002186; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002187; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002188;
Gadi Haber85d99b42017-10-17 13:45:39 +00002189; BROADWELL-LABEL: test_movups:
2190; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002191; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002192; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2193; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002194; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002195;
Gadi Haber767d98b2017-08-30 08:08:50 +00002196; SKYLAKE-LABEL: test_movups:
2197; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002198; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002199; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00002200; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002201; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002202;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002203; SKX-LABEL: test_movups:
2204; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002205; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
2206; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002207; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002208; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002209;
Simon Pilgrim93986492017-04-18 19:04:40 +00002210; BTVER2-LABEL: test_movups:
2211; BTVER2: # BB#0:
2212; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
2213; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2214; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2215; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002216;
2217; ZNVER1-LABEL: test_movups:
2218; ZNVER1: # BB#0:
2219; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
2220; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2221; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002222; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002223 %1 = load <4 x float>, <4 x float> *%a0, align 1
2224 %2 = fadd <4 x float> %1, %1
2225 store <4 x float> %2, <4 x float> *%a1, align 1
2226 ret void
2227}
2228
2229define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2230; GENERIC-LABEL: test_mulps:
2231; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002232; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
2233; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
2234; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002235;
2236; ATOM-LABEL: test_mulps:
2237; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002238; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
2239; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [10:5.00]
2240; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002241;
2242; SLM-LABEL: test_mulps:
2243; SLM: # BB#0:
2244; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
2245; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
2246; SLM-NEXT: retq # sched: [4:1.00]
2247;
2248; SANDY-LABEL: test_mulps:
2249; SANDY: # BB#0:
2250; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002251; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2252; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002253;
2254; HASWELL-LABEL: test_mulps:
2255; HASWELL: # BB#0:
2256; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002257; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2258; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002259;
Gadi Haber85d99b42017-10-17 13:45:39 +00002260; BROADWELL-LABEL: test_mulps:
2261; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002262; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2263; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2264; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002265;
Gadi Haber767d98b2017-08-30 08:08:50 +00002266; SKYLAKE-LABEL: test_mulps:
2267; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002268; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002269; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2270; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002271;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002272; SKX-LABEL: test_mulps:
2273; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002274; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2275; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2276; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002277;
Simon Pilgrim93986492017-04-18 19:04:40 +00002278; BTVER2-LABEL: test_mulps:
2279; BTVER2: # BB#0:
2280; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2281; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2282; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002283;
2284; ZNVER1-LABEL: test_mulps:
2285; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002286; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2287; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2288; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002289 %1 = fmul <4 x float> %a0, %a1
2290 %2 = load <4 x float>, <4 x float> *%a2, align 16
2291 %3 = fmul <4 x float> %1, %2
2292 ret <4 x float> %3
2293}
2294
2295define float @test_mulss(float %a0, float %a1, float *%a2) {
2296; GENERIC-LABEL: test_mulss:
2297; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002298; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
2299; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
2300; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002301;
2302; ATOM-LABEL: test_mulss:
2303; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002304; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
2305; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [5:5.00]
2306; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002307;
2308; SLM-LABEL: test_mulss:
2309; SLM: # BB#0:
2310; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
2311; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
2312; SLM-NEXT: retq # sched: [4:1.00]
2313;
2314; SANDY-LABEL: test_mulss:
2315; SANDY: # BB#0:
2316; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002317; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2318; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002319;
2320; HASWELL-LABEL: test_mulss:
2321; HASWELL: # BB#0:
2322; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002323; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2324; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002325;
Gadi Haber85d99b42017-10-17 13:45:39 +00002326; BROADWELL-LABEL: test_mulss:
2327; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002328; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2329; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2330; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002331;
Gadi Haber767d98b2017-08-30 08:08:50 +00002332; SKYLAKE-LABEL: test_mulss:
2333; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002334; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002335; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
2336; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002337;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002338; SKX-LABEL: test_mulss:
2339; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002340; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2341; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
2342; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002343;
Simon Pilgrim93986492017-04-18 19:04:40 +00002344; BTVER2-LABEL: test_mulss:
2345; BTVER2: # BB#0:
2346; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2347; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2348; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002349;
2350; ZNVER1-LABEL: test_mulss:
2351; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002352; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2353; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2354; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002355 %1 = fmul float %a0, %a1
2356 %2 = load float, float *%a2, align 4
2357 %3 = fmul float %1, %2
2358 ret float %3
2359}
2360
2361define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2362; GENERIC-LABEL: test_orps:
2363; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002364; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
2365; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
2366; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002367;
2368; ATOM-LABEL: test_orps:
2369; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00002370; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2371; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002372; ATOM-NEXT: nop # sched: [1:0.50]
2373; ATOM-NEXT: nop # sched: [1:0.50]
2374; ATOM-NEXT: nop # sched: [1:0.50]
2375; ATOM-NEXT: nop # sched: [1:0.50]
2376; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002377;
2378; SLM-LABEL: test_orps:
2379; SLM: # BB#0:
2380; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2381; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
2382; SLM-NEXT: retq # sched: [4:1.00]
2383;
2384; SANDY-LABEL: test_orps:
2385; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002386; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2387; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2388; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002389;
2390; HASWELL-LABEL: test_orps:
2391; HASWELL: # BB#0:
2392; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002393; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
2394; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002395;
Gadi Haber85d99b42017-10-17 13:45:39 +00002396; BROADWELL-LABEL: test_orps:
2397; BROADWELL: # BB#0:
2398; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002399; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2400; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002401;
Gadi Haber767d98b2017-08-30 08:08:50 +00002402; SKYLAKE-LABEL: test_orps:
2403; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002404; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2405; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
2406; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002407;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002408; SKX-LABEL: test_orps:
2409; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002410; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2411; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
2412; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002413;
Simon Pilgrim93986492017-04-18 19:04:40 +00002414; BTVER2-LABEL: test_orps:
2415; BTVER2: # BB#0:
2416; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2417; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2418; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002419;
2420; ZNVER1-LABEL: test_orps:
2421; ZNVER1: # BB#0:
2422; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2423; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002424; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002425 %1 = bitcast <4 x float> %a0 to <4 x i32>
2426 %2 = bitcast <4 x float> %a1 to <4 x i32>
2427 %3 = or <4 x i32> %1, %2
2428 %4 = load <4 x float>, <4 x float> *%a2, align 16
2429 %5 = bitcast <4 x float> %4 to <4 x i32>
2430 %6 = or <4 x i32> %3, %5
2431 %7 = bitcast <4 x i32> %6 to <4 x float>
2432 ret <4 x float> %7
2433}
2434
2435define void @test_prefetchnta(i8* %a0) {
2436; GENERIC-LABEL: test_prefetchnta:
2437; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002438; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2439; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002440;
2441; ATOM-LABEL: test_prefetchnta:
2442; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002443; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
2444; ATOM-NEXT: nop # sched: [1:0.50]
2445; ATOM-NEXT: nop # sched: [1:0.50]
2446; ATOM-NEXT: nop # sched: [1:0.50]
2447; ATOM-NEXT: nop # sched: [1:0.50]
2448; ATOM-NEXT: nop # sched: [1:0.50]
2449; ATOM-NEXT: nop # sched: [1:0.50]
2450; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002451;
2452; SLM-LABEL: test_prefetchnta:
2453; SLM: # BB#0:
2454; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
2455; SLM-NEXT: retq # sched: [4:1.00]
2456;
2457; SANDY-LABEL: test_prefetchnta:
2458; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002459; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2460; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002461;
2462; HASWELL-LABEL: test_prefetchnta:
2463; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002464; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2465; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002466;
Gadi Haber85d99b42017-10-17 13:45:39 +00002467; BROADWELL-LABEL: test_prefetchnta:
2468; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002469; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2470; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002471;
Gadi Haber767d98b2017-08-30 08:08:50 +00002472; SKYLAKE-LABEL: test_prefetchnta:
2473; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002474; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2475; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002476;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002477; SKX-LABEL: test_prefetchnta:
2478; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002479; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2480; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002481;
Simon Pilgrim93986492017-04-18 19:04:40 +00002482; BTVER2-LABEL: test_prefetchnta:
2483; BTVER2: # BB#0:
2484; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
2485; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002486;
2487; ZNVER1-LABEL: test_prefetchnta:
2488; ZNVER1: # BB#0:
2489; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002490; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002491 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
2492 ret void
2493}
2494declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
2495
2496define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
2497; GENERIC-LABEL: test_rcpps:
2498; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002499; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
2500; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
2501; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2502; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002503;
2504; ATOM-LABEL: test_rcpps:
2505; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002506; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
2507; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
2508; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2509; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2510; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002511;
2512; SLM-LABEL: test_rcpps:
2513; SLM: # BB#0:
2514; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
2515; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
2516; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2517; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2518; SLM-NEXT: retq # sched: [4:1.00]
2519;
2520; SANDY-LABEL: test_rcpps:
2521; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002522; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002523; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002524; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002525; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002526;
2527; HASWELL-LABEL: test_rcpps:
2528; HASWELL: # BB#0:
2529; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002530; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002531; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002532; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002533;
Gadi Haber85d99b42017-10-17 13:45:39 +00002534; BROADWELL-LABEL: test_rcpps:
2535; BROADWELL: # BB#0:
2536; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002537; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002538; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002539; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002540;
Gadi Haber767d98b2017-08-30 08:08:50 +00002541; SKYLAKE-LABEL: test_rcpps:
2542; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002543; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002544; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002545; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002546; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002547;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002548; SKX-LABEL: test_rcpps:
2549; SKX: # BB#0:
Craig Topper692c8ef2017-11-04 18:26:41 +00002550; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
2551; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002552; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2553; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002554;
Simon Pilgrim93986492017-04-18 19:04:40 +00002555; BTVER2-LABEL: test_rcpps:
2556; BTVER2: # BB#0:
2557; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
2558; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
2559; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2560; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002561;
2562; ZNVER1-LABEL: test_rcpps:
2563; ZNVER1: # BB#0:
2564; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
2565; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
2566; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002567; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002568 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
2569 %2 = load <4 x float>, <4 x float> *%a1, align 16
2570 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
2571 %4 = fadd <4 x float> %1, %3
2572 ret <4 x float> %4
2573}
2574declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
2575
2576; TODO - rcpss_m
2577
2578define <4 x float> @test_rcpss(float %a0, float *%a1) {
2579; GENERIC-LABEL: test_rcpss:
2580; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002581; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
2582; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2583; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
2584; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2585; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002586;
2587; ATOM-LABEL: test_rcpss:
2588; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002589; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002590; ATOM-NEXT: rcpss %xmm0, %xmm0
2591; ATOM-NEXT: rcpss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002592; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2593; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002594;
2595; SLM-LABEL: test_rcpss:
2596; SLM: # BB#0:
2597; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2598; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
2599; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
2600; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2601; SLM-NEXT: retq # sched: [4:1.00]
2602;
2603; SANDY-LABEL: test_rcpss:
2604; SANDY: # BB#0:
Gadi Haberbed2c502017-08-13 13:59:24 +00002605; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002606; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
Gadi Haberbed2c502017-08-13 13:59:24 +00002607; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002608; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002609; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002610;
2611; HASWELL-LABEL: test_rcpss:
2612; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002613; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2614; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2615; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002616; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002617; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002618;
Gadi Haber85d99b42017-10-17 13:45:39 +00002619; BROADWELL-LABEL: test_rcpss:
2620; BROADWELL: # BB#0:
2621; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002622; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002623; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2624; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002625; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002626;
Gadi Haber767d98b2017-08-30 08:08:50 +00002627; SKYLAKE-LABEL: test_rcpss:
2628; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002629; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002630; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002631; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2632; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002633; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002634;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002635; SKX-LABEL: test_rcpss:
2636; SKX: # BB#0:
2637; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002638; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002639; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002640; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2641; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002642;
Simon Pilgrim93986492017-04-18 19:04:40 +00002643; BTVER2-LABEL: test_rcpss:
2644; BTVER2: # BB#0:
2645; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2646; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2647; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2648; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2649; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002650;
2651; ZNVER1-LABEL: test_rcpss:
2652; ZNVER1: # BB#0:
2653; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2654; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
2655; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
2656; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002657; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002658 %1 = insertelement <4 x float> undef, float %a0, i32 0
2659 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
2660 %3 = load float, float *%a1, align 4
2661 %4 = insertelement <4 x float> undef, float %3, i32 0
2662 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
2663 %6 = fadd <4 x float> %2, %5
2664 ret <4 x float> %6
2665}
2666declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
2667
2668define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
2669; GENERIC-LABEL: test_rsqrtps:
2670; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002671; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
2672; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
2673; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2674; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002675;
2676; ATOM-LABEL: test_rsqrtps:
2677; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002678; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
2679; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
2680; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2681; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2682; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002683;
2684; SLM-LABEL: test_rsqrtps:
2685; SLM: # BB#0:
2686; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
2687; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2688; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2689; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2690; SLM-NEXT: retq # sched: [4:1.00]
2691;
2692; SANDY-LABEL: test_rsqrtps:
2693; SANDY: # BB#0:
2694; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002695; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002696; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002697; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002698;
2699; HASWELL-LABEL: test_rsqrtps:
2700; HASWELL: # BB#0:
2701; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002702; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002703; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002704; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002705;
Gadi Haber85d99b42017-10-17 13:45:39 +00002706; BROADWELL-LABEL: test_rsqrtps:
2707; BROADWELL: # BB#0:
2708; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002709; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002710; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002711; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002712;
Gadi Haber767d98b2017-08-30 08:08:50 +00002713; SKYLAKE-LABEL: test_rsqrtps:
2714; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002715; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002716; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002717; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002718; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002719;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002720; SKX-LABEL: test_rsqrtps:
2721; SKX: # BB#0:
Craig Topper692c8ef2017-11-04 18:26:41 +00002722; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
2723; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002724; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2725; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002726;
Simon Pilgrim93986492017-04-18 19:04:40 +00002727; BTVER2-LABEL: test_rsqrtps:
2728; BTVER2: # BB#0:
2729; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
2730; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
2731; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2732; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002733;
2734; ZNVER1-LABEL: test_rsqrtps:
2735; ZNVER1: # BB#0:
2736; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
2737; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
2738; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002739; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002740 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2741 %2 = load <4 x float>, <4 x float> *%a1, align 16
2742 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
2743 %4 = fadd <4 x float> %1, %3
2744 ret <4 x float> %4
2745}
2746declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2747
2748; TODO - rsqrtss_m
2749
2750define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
2751; GENERIC-LABEL: test_rsqrtss:
2752; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002753; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
2754; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2755; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
2756; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2757; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002758;
2759; ATOM-LABEL: test_rsqrtss:
2760; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002761; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002762; ATOM-NEXT: rsqrtss %xmm0, %xmm0
2763; ATOM-NEXT: rsqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002764; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2765; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002766;
2767; SLM-LABEL: test_rsqrtss:
2768; SLM: # BB#0:
2769; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2770; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
2771; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
2772; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2773; SLM-NEXT: retq # sched: [4:1.00]
2774;
2775; SANDY-LABEL: test_rsqrtss:
2776; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002777; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2778; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2779; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002780; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002781; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002782;
2783; HASWELL-LABEL: test_rsqrtss:
2784; HASWELL: # BB#0:
2785; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002786; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002787; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2788; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002789; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002790;
Gadi Haber85d99b42017-10-17 13:45:39 +00002791; BROADWELL-LABEL: test_rsqrtss:
2792; BROADWELL: # BB#0:
2793; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002794; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002795; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2796; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002797; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002798;
Gadi Haber767d98b2017-08-30 08:08:50 +00002799; SKYLAKE-LABEL: test_rsqrtss:
2800; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002801; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002802; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002803; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2804; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002805; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002806;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002807; SKX-LABEL: test_rsqrtss:
2808; SKX: # BB#0:
2809; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002810; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002811; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002812; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2813; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002814;
Simon Pilgrim93986492017-04-18 19:04:40 +00002815; BTVER2-LABEL: test_rsqrtss:
2816; BTVER2: # BB#0:
2817; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2818; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2819; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2820; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2821; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002822;
2823; ZNVER1-LABEL: test_rsqrtss:
2824; ZNVER1: # BB#0:
2825; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002826; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
2827; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
Craig Topper106b5b62017-07-19 02:45:14 +00002828; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002829; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002830 %1 = insertelement <4 x float> undef, float %a0, i32 0
2831 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
2832 %3 = load float, float *%a1, align 4
2833 %4 = insertelement <4 x float> undef, float %3, i32 0
2834 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
2835 %6 = fadd <4 x float> %2, %5
2836 ret <4 x float> %6
2837}
2838declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2839
2840define void @test_sfence() {
2841; GENERIC-LABEL: test_sfence:
2842; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002843; GENERIC-NEXT: sfence # sched: [1:1.00]
2844; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002845;
2846; ATOM-LABEL: test_sfence:
2847; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002848; ATOM-NEXT: sfence # sched: [1:1.00]
2849; ATOM-NEXT: nop # sched: [1:0.50]
2850; ATOM-NEXT: nop # sched: [1:0.50]
2851; ATOM-NEXT: nop # sched: [1:0.50]
2852; ATOM-NEXT: nop # sched: [1:0.50]
2853; ATOM-NEXT: nop # sched: [1:0.50]
2854; ATOM-NEXT: nop # sched: [1:0.50]
2855; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002856;
2857; SLM-LABEL: test_sfence:
2858; SLM: # BB#0:
2859; SLM-NEXT: sfence # sched: [1:1.00]
2860; SLM-NEXT: retq # sched: [4:1.00]
2861;
2862; SANDY-LABEL: test_sfence:
2863; SANDY: # BB#0:
2864; SANDY-NEXT: sfence # sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002865; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002866;
2867; HASWELL-LABEL: test_sfence:
2868; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00002869; HASWELL-NEXT: sfence # sched: [1:0.33]
2870; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002871;
Gadi Haber85d99b42017-10-17 13:45:39 +00002872; BROADWELL-LABEL: test_sfence:
2873; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002874; BROADWELL-NEXT: sfence # sched: [2:0.33]
2875; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002876;
Gadi Haber767d98b2017-08-30 08:08:50 +00002877; SKYLAKE-LABEL: test_sfence:
2878; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002879; SKYLAKE-NEXT: sfence # sched: [2:0.33]
2880; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002881;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002882; SKX-LABEL: test_sfence:
2883; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002884; SKX-NEXT: sfence # sched: [2:0.33]
2885; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002886;
Simon Pilgrim93986492017-04-18 19:04:40 +00002887; BTVER2-LABEL: test_sfence:
2888; BTVER2: # BB#0:
2889; BTVER2-NEXT: sfence # sched: [1:1.00]
2890; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002891;
2892; ZNVER1-LABEL: test_sfence:
2893; ZNVER1: # BB#0:
2894; ZNVER1-NEXT: sfence # sched: [1:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002895; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002896 call void @llvm.x86.sse.sfence()
2897 ret void
2898}
2899declare void @llvm.x86.sse.sfence() nounwind readnone
2900
2901define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
2902; GENERIC-LABEL: test_shufps:
2903; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002904; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2905; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2906; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002907;
2908; ATOM-LABEL: test_shufps:
2909; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002910; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2911; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2912; ATOM-NEXT: nop # sched: [1:0.50]
2913; ATOM-NEXT: nop # sched: [1:0.50]
2914; ATOM-NEXT: nop # sched: [1:0.50]
2915; ATOM-NEXT: nop # sched: [1:0.50]
2916; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002917;
2918; SLM-LABEL: test_shufps:
2919; SLM: # BB#0:
2920; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2921; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
2922; SLM-NEXT: retq # sched: [4:1.00]
2923;
2924; SANDY-LABEL: test_shufps:
2925; SANDY: # BB#0:
2926; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002927; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2928; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002929;
2930; HASWELL-LABEL: test_shufps:
2931; HASWELL: # BB#0:
2932; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00002933; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2934; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002935;
Gadi Haber85d99b42017-10-17 13:45:39 +00002936; BROADWELL-LABEL: test_shufps:
2937; BROADWELL: # BB#0:
2938; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002939; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2940; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002941;
Gadi Haber767d98b2017-08-30 08:08:50 +00002942; SKYLAKE-LABEL: test_shufps:
2943; SKYLAKE: # BB#0:
2944; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002945; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2946; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00002947;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002948; SKX-LABEL: test_shufps:
2949; SKX: # BB#0:
2950; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002951; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2952; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002953;
Simon Pilgrim93986492017-04-18 19:04:40 +00002954; BTVER2-LABEL: test_shufps:
2955; BTVER2: # BB#0:
2956; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2957; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2958; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00002959;
2960; ZNVER1-LABEL: test_shufps:
2961; ZNVER1: # BB#0:
2962; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2963; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00002964; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002965 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2966 %2 = load <4 x float>, <4 x float> *%a2, align 16
2967 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
2968 ret <4 x float> %3
2969}
2970
2971define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
2972; GENERIC-LABEL: test_sqrtps:
2973; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00002974; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00]
2975; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00]
2976; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2977; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002978;
2979; ATOM-LABEL: test_sqrtps:
2980; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00002981; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
2982; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
2983; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2984; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00002985;
2986; SLM-LABEL: test_sqrtps:
2987; SLM: # BB#0:
2988; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
2989; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
2990; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2991; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2992; SLM-NEXT: retq # sched: [4:1.00]
2993;
2994; SANDY-LABEL: test_sqrtps:
2995; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00002996; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2997; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00002998; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00002999; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003000;
3001; HASWELL-LABEL: test_sqrtps:
3002; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00003003; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
3004; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003005; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003006; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003007;
Gadi Haber85d99b42017-10-17 13:45:39 +00003008; BROADWELL-LABEL: test_sqrtps:
3009; BROADWELL: # BB#0:
3010; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003011; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003012; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003013; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003014;
Gadi Haber767d98b2017-08-30 08:08:50 +00003015; SKYLAKE-LABEL: test_sqrtps:
3016; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003017; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003018; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003019; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003020; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003021;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003022; SKX-LABEL: test_sqrtps:
3023; SKX: # BB#0:
3024; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003025; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00]
3026; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3027; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003028;
Simon Pilgrim93986492017-04-18 19:04:40 +00003029; BTVER2-LABEL: test_sqrtps:
3030; BTVER2: # BB#0:
3031; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
3032; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
3033; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3034; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003035;
3036; ZNVER1-LABEL: test_sqrtps:
3037; ZNVER1: # BB#0:
3038; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
3039; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
3040; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003041; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003042 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
3043 %2 = load <4 x float>, <4 x float> *%a1, align 16
3044 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
3045 %4 = fadd <4 x float> %1, %3
3046 ret <4 x float> %4
3047}
3048declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
3049
3050; TODO - sqrtss_m
3051
3052define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
3053; GENERIC-LABEL: test_sqrtss:
3054; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003055; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00]
3056; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
3057; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00]
3058; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
3059; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003060;
3061; ATOM-LABEL: test_sqrtss:
3062; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003063; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003064; ATOM-NEXT: sqrtss %xmm0, %xmm0
3065; ATOM-NEXT: sqrtss %xmm1, %xmm1
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003066; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
3067; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003068;
3069; SLM-LABEL: test_sqrtss:
3070; SLM: # BB#0:
3071; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
3072; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
3073; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
3074; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
3075; SLM-NEXT: retq # sched: [4:1.00]
3076;
3077; SANDY-LABEL: test_sqrtss:
3078; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00003079; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
3080; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
3081; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003082; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003083; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003084;
3085; HASWELL-LABEL: test_sqrtss:
3086; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00003087; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
3088; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
3089; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003090; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003091; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003092;
Gadi Haber85d99b42017-10-17 13:45:39 +00003093; BROADWELL-LABEL: test_sqrtss:
3094; BROADWELL: # BB#0:
3095; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003096; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00003097; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
3098; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003099; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003100;
Gadi Haber767d98b2017-08-30 08:08:50 +00003101; SKYLAKE-LABEL: test_sqrtss:
3102; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003103; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003104; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003105; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
3106; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003107; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003108;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003109; SKX-LABEL: test_sqrtss:
3110; SKX: # BB#0:
3111; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003112; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003113; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003114; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3115; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003116;
Simon Pilgrim93986492017-04-18 19:04:40 +00003117; BTVER2-LABEL: test_sqrtss:
3118; BTVER2: # BB#0:
3119; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
3120; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
3121; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
3122; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3123; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003124;
3125; ZNVER1-LABEL: test_sqrtss:
3126; ZNVER1: # BB#0:
3127; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
3128; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [27:1.00]
3129; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [27:1.00]
3130; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003131; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003132 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
3133 %2 = load <4 x float>, <4 x float> *%a1, align 16
3134 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
3135 %4 = fadd <4 x float> %1, %3
3136 ret <4 x float> %4
3137}
3138declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
3139
3140define i32 @test_stmxcsr() {
3141; GENERIC-LABEL: test_stmxcsr:
3142; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003143; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
3144; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3145; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003146;
3147; ATOM-LABEL: test_stmxcsr:
3148; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003149; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
3150; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
3151; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003152;
3153; SLM-LABEL: test_stmxcsr:
3154; SLM: # BB#0:
3155; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3156; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
3157; SLM-NEXT: retq # sched: [4:1.00]
3158;
3159; SANDY-LABEL: test_stmxcsr:
3160; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00003161; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
3162; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3163; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003164;
3165; HASWELL-LABEL: test_stmxcsr:
3166; HASWELL: # BB#0:
Gadi Haberd76f7b82017-08-28 10:04:16 +00003167; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3168; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
3169; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003170;
Gadi Haber85d99b42017-10-17 13:45:39 +00003171; BROADWELL-LABEL: test_stmxcsr:
3172; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003173; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3174; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3175; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003176;
Gadi Haber767d98b2017-08-30 08:08:50 +00003177; SKYLAKE-LABEL: test_stmxcsr:
3178; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003179; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3180; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3181; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003182;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003183; SKX-LABEL: test_stmxcsr:
3184; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003185; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3186; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3187; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003188;
Simon Pilgrim93986492017-04-18 19:04:40 +00003189; BTVER2-LABEL: test_stmxcsr:
3190; BTVER2: # BB#0:
3191; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3192; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
3193; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003194;
3195; ZNVER1-LABEL: test_stmxcsr:
3196; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003197; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
Craig Topper106b5b62017-07-19 02:45:14 +00003198; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003199; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003200 %1 = alloca i32, align 4
3201 %2 = bitcast i32* %1 to i8*
3202 call void @llvm.x86.sse.stmxcsr(i8* %2)
3203 %3 = load i32, i32* %1, align 4
3204 ret i32 %3
3205}
3206declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
3207
3208define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3209; GENERIC-LABEL: test_subps:
3210; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003211; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
3212; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
3213; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003214;
3215; ATOM-LABEL: test_subps:
3216; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003217; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
3218; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
3219; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003220;
3221; SLM-LABEL: test_subps:
3222; SLM: # BB#0:
3223; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
3224; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
3225; SLM-NEXT: retq # sched: [4:1.00]
3226;
3227; SANDY-LABEL: test_subps:
3228; SANDY: # BB#0:
3229; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003230; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3231; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003232;
3233; HASWELL-LABEL: test_subps:
3234; HASWELL: # BB#0:
3235; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003236; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
3237; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003238;
Gadi Haber85d99b42017-10-17 13:45:39 +00003239; BROADWELL-LABEL: test_subps:
3240; BROADWELL: # BB#0:
3241; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003242; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3243; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003244;
Gadi Haber767d98b2017-08-30 08:08:50 +00003245; SKYLAKE-LABEL: test_subps:
3246; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003247; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003248; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
3249; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003250;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003251; SKX-LABEL: test_subps:
3252; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003253; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3254; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
3255; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003256;
Simon Pilgrim93986492017-04-18 19:04:40 +00003257; BTVER2-LABEL: test_subps:
3258; BTVER2: # BB#0:
3259; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3260; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3261; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003262;
3263; ZNVER1-LABEL: test_subps:
3264; ZNVER1: # BB#0:
3265; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3266; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003267; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003268 %1 = fsub <4 x float> %a0, %a1
3269 %2 = load <4 x float>, <4 x float> *%a2, align 16
3270 %3 = fsub <4 x float> %1, %2
3271 ret <4 x float> %3
3272}
3273
3274define float @test_subss(float %a0, float %a1, float *%a2) {
3275; GENERIC-LABEL: test_subss:
3276; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003277; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
3278; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
3279; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003280;
3281; ATOM-LABEL: test_subss:
3282; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003283; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
3284; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
3285; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003286;
3287; SLM-LABEL: test_subss:
3288; SLM: # BB#0:
3289; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
3290; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
3291; SLM-NEXT: retq # sched: [4:1.00]
3292;
3293; SANDY-LABEL: test_subss:
3294; SANDY: # BB#0:
3295; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003296; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3297; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003298;
3299; HASWELL-LABEL: test_subss:
3300; HASWELL: # BB#0:
3301; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003302; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
3303; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003304;
Gadi Haber85d99b42017-10-17 13:45:39 +00003305; BROADWELL-LABEL: test_subss:
3306; BROADWELL: # BB#0:
3307; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003308; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3309; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003310;
Gadi Haber767d98b2017-08-30 08:08:50 +00003311; SKYLAKE-LABEL: test_subss:
3312; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003313; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003314; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
3315; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003316;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003317; SKX-LABEL: test_subss:
3318; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003319; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3320; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
3321; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003322;
Simon Pilgrim93986492017-04-18 19:04:40 +00003323; BTVER2-LABEL: test_subss:
3324; BTVER2: # BB#0:
3325; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3326; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3327; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003328;
3329; ZNVER1-LABEL: test_subss:
3330; ZNVER1: # BB#0:
3331; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3332; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003333; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003334 %1 = fsub float %a0, %a1
3335 %2 = load float, float *%a2, align 4
3336 %3 = fsub float %1, %2
3337 ret float %3
3338}
3339
3340define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3341; GENERIC-LABEL: test_ucomiss:
3342; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003343; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003344; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3345; GENERIC-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00003346; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
3347; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003348; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3349; GENERIC-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim84846982017-08-01 15:14:35 +00003350; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
3351; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
3352; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
3353; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003354;
3355; ATOM-LABEL: test_ucomiss:
3356; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003357; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
3358; ATOM-NEXT: setnp %al # sched: [1:0.50]
3359; ATOM-NEXT: sete %cl # sched: [1:0.50]
3360; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
3361; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
3362; ATOM-NEXT: setnp %al # sched: [1:0.50]
3363; ATOM-NEXT: sete %dl # sched: [1:0.50]
3364; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
3365; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
3366; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
3367; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003368;
3369; SLM-LABEL: test_ucomiss:
3370; SLM: # BB#0:
3371; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
3372; SLM-NEXT: setnp %al # sched: [1:0.50]
3373; SLM-NEXT: sete %cl # sched: [1:0.50]
3374; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
3375; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
3376; SLM-NEXT: setnp %al # sched: [1:0.50]
3377; SLM-NEXT: sete %dl # sched: [1:0.50]
3378; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
3379; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
3380; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3381; SLM-NEXT: retq # sched: [4:1.00]
3382;
3383; SANDY-LABEL: test_ucomiss:
3384; SANDY: # BB#0:
3385; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003386; SANDY-NEXT: setnp %al # sched: [1:0.50]
3387; SANDY-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003388; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
3389; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Gadi Haberbed2c502017-08-13 13:59:24 +00003390; SANDY-NEXT: setnp %al # sched: [1:0.50]
3391; SANDY-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003392; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
3393; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
3394; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003395; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003396;
3397; HASWELL-LABEL: test_ucomiss:
3398; HASWELL: # BB#0:
3399; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00003400; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3401; HASWELL-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003402; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
3403; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
Michael Zuckermanf6684002017-06-28 11:23:31 +00003404; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3405; HASWELL-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003406; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
3407; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
3408; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003409; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003410;
Gadi Haber85d99b42017-10-17 13:45:39 +00003411; BROADWELL-LABEL: test_ucomiss:
3412; BROADWELL: # BB#0:
3413; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3414; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
3415; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
3416; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003417; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003418; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
3419; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
3420; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
3421; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
3422; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003423; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003424;
Gadi Haber767d98b2017-08-30 08:08:50 +00003425; SKYLAKE-LABEL: test_ucomiss:
3426; SKYLAKE: # BB#0:
3427; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003428; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
3429; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00003430; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003431; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003432; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
3433; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
Gadi Haber767d98b2017-08-30 08:08:50 +00003434; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
3435; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
3436; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003437; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003438;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003439; SKX-LABEL: test_ucomiss:
3440; SKX: # BB#0:
3441; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003442; SKX-NEXT: setnp %al # sched: [1:0.50]
3443; SKX-NEXT: sete %cl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003444; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
3445; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003446; SKX-NEXT: setnp %al # sched: [1:0.50]
3447; SKX-NEXT: sete %dl # sched: [1:0.50]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003448; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
3449; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
3450; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003451; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003452;
Simon Pilgrim93986492017-04-18 19:04:40 +00003453; BTVER2-LABEL: test_ucomiss:
3454; BTVER2: # BB#0:
3455; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3456; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3457; BTVER2-NEXT: sete %cl # sched: [1:0.50]
3458; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
3459; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3460; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3461; BTVER2-NEXT: sete %dl # sched: [1:0.50]
3462; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
3463; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
3464; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3465; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003466;
3467; ZNVER1-LABEL: test_ucomiss:
3468; ZNVER1: # BB#0:
3469; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3470; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3471; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
3472; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
3473; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
3474; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3475; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
3476; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
3477; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
3478; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003479; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003480 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
3481 %2 = load <4 x float>, <4 x float> *%a2, align 4
3482 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
3483 %4 = or i32 %1, %3
3484 ret i32 %4
3485}
3486declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
3487
3488define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3489; GENERIC-LABEL: test_unpckhps:
3490; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003491; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3492; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3493; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003494;
3495; ATOM-LABEL: test_unpckhps:
3496; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003497; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3498; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3499; ATOM-NEXT: nop # sched: [1:0.50]
3500; ATOM-NEXT: nop # sched: [1:0.50]
3501; ATOM-NEXT: nop # sched: [1:0.50]
3502; ATOM-NEXT: nop # sched: [1:0.50]
3503; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003504;
3505; SLM-LABEL: test_unpckhps:
3506; SLM: # BB#0:
3507; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3508; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
3509; SLM-NEXT: retq # sched: [4:1.00]
3510;
3511; SANDY-LABEL: test_unpckhps:
3512; SANDY: # BB#0:
3513; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003514; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3515; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003516;
3517; HASWELL-LABEL: test_unpckhps:
3518; HASWELL: # BB#0:
3519; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003520; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3521; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003522;
Gadi Haber85d99b42017-10-17 13:45:39 +00003523; BROADWELL-LABEL: test_unpckhps:
3524; BROADWELL: # BB#0:
3525; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003526; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
3527; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003528;
Gadi Haber767d98b2017-08-30 08:08:50 +00003529; SKYLAKE-LABEL: test_unpckhps:
3530; SKYLAKE: # BB#0:
3531; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003532; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3533; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003534;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003535; SKX-LABEL: test_unpckhps:
3536; SKX: # BB#0:
3537; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003538; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3539; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003540;
Simon Pilgrim93986492017-04-18 19:04:40 +00003541; BTVER2-LABEL: test_unpckhps:
3542; BTVER2: # BB#0:
3543; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3544; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
3545; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003546;
3547; ZNVER1-LABEL: test_unpckhps:
3548; ZNVER1: # BB#0:
3549; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3550; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003551; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003552 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3553 %2 = load <4 x float>, <4 x float> *%a2, align 16
3554 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3555 ret <4 x float> %3
3556}
3557
3558define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3559; GENERIC-LABEL: test_unpcklps:
3560; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003561; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3562; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3563; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003564;
3565; ATOM-LABEL: test_unpcklps:
3566; ATOM: # BB#0:
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003567; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3568; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3569; ATOM-NEXT: nop # sched: [1:0.50]
3570; ATOM-NEXT: nop # sched: [1:0.50]
3571; ATOM-NEXT: nop # sched: [1:0.50]
3572; ATOM-NEXT: nop # sched: [1:0.50]
3573; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003574;
3575; SLM-LABEL: test_unpcklps:
3576; SLM: # BB#0:
3577; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3578; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
3579; SLM-NEXT: retq # sched: [4:1.00]
3580;
3581; SANDY-LABEL: test_unpcklps:
3582; SANDY: # BB#0:
3583; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberf4d154c2017-07-10 09:53:16 +00003584; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3585; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003586;
3587; HASWELL-LABEL: test_unpcklps:
3588; HASWELL: # BB#0:
3589; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003590; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3591; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003592;
Gadi Haber85d99b42017-10-17 13:45:39 +00003593; BROADWELL-LABEL: test_unpcklps:
3594; BROADWELL: # BB#0:
3595; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003596; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
3597; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003598;
Gadi Haber767d98b2017-08-30 08:08:50 +00003599; SKYLAKE-LABEL: test_unpcklps:
3600; SKYLAKE: # BB#0:
3601; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003602; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3603; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003604;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003605; SKX-LABEL: test_unpcklps:
3606; SKX: # BB#0:
3607; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003608; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3609; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003610;
Simon Pilgrim93986492017-04-18 19:04:40 +00003611; BTVER2-LABEL: test_unpcklps:
3612; BTVER2: # BB#0:
3613; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3614; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
3615; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003616;
3617; ZNVER1-LABEL: test_unpcklps:
3618; ZNVER1: # BB#0:
3619; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3620; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003621; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003622 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3623 %2 = load <4 x float>, <4 x float> *%a2, align 16
3624 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3625 ret <4 x float> %3
3626}
3627
3628define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3629; GENERIC-LABEL: test_xorps:
3630; GENERIC: # BB#0:
Simon Pilgrim84846982017-08-01 15:14:35 +00003631; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
3632; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
3633; GENERIC-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003634;
3635; ATOM-LABEL: test_xorps:
3636; ATOM: # BB#0:
Simon Pilgrim486072d2017-08-01 17:51:20 +00003637; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3638; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00]
Andrew V. Tischenkod5659512017-08-01 09:15:43 +00003639; ATOM-NEXT: nop # sched: [1:0.50]
3640; ATOM-NEXT: nop # sched: [1:0.50]
3641; ATOM-NEXT: nop # sched: [1:0.50]
3642; ATOM-NEXT: nop # sched: [1:0.50]
3643; ATOM-NEXT: retq # sched: [79:39.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003644;
3645; SLM-LABEL: test_xorps:
3646; SLM: # BB#0:
3647; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3648; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
3649; SLM-NEXT: retq # sched: [4:1.00]
3650;
3651; SANDY-LABEL: test_xorps:
3652; SANDY: # BB#0:
Gadi Haberf4d154c2017-07-10 09:53:16 +00003653; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3654; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3655; SANDY-NEXT: retq # sched: [1:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003656;
3657; HASWELL-LABEL: test_xorps:
3658; HASWELL: # BB#0:
3659; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00003660; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3661; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim93986492017-04-18 19:04:40 +00003662;
Gadi Haber85d99b42017-10-17 13:45:39 +00003663; BROADWELL-LABEL: test_xorps:
3664; BROADWELL: # BB#0:
3665; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003666; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3667; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003668;
Gadi Haber767d98b2017-08-30 08:08:50 +00003669; SKYLAKE-LABEL: test_xorps:
3670; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003671; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
3672; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
3673; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00003674;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003675; SKX-LABEL: test_xorps:
3676; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003677; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
3678; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
3679; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003680;
Simon Pilgrim93986492017-04-18 19:04:40 +00003681; BTVER2-LABEL: test_xorps:
3682; BTVER2: # BB#0:
3683; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3684; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3685; BTVER2-NEXT: retq # sched: [4:1.00]
Craig Topper106b5b62017-07-19 02:45:14 +00003686;
3687; ZNVER1-LABEL: test_xorps:
3688; ZNVER1: # BB#0:
3689; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
3690; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00003691; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim93986492017-04-18 19:04:40 +00003692 %1 = bitcast <4 x float> %a0 to <4 x i32>
3693 %2 = bitcast <4 x float> %a1 to <4 x i32>
3694 %3 = xor <4 x i32> %1, %2
3695 %4 = load <4 x float>, <4 x float> *%a2, align 16
3696 %5 = bitcast <4 x float> %4 to <4 x i32>
3697 %6 = xor <4 x i32> %3, %5
3698 %7 = bitcast <4 x i32> %6 to <4 x float>
3699 ret <4 x float> %7
3700}
3701
3702!0 = !{i32 1}