blob: f463f87f8f425e9da539de521f8d70d17428e554 [file] [log] [blame]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim946f08c2017-05-06 13:46:09 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00004; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
7
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00008define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
9; GENERIC-LABEL: test_broadcasti128:
10; GENERIC: # BB#0:
11; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
12; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
13; GENERIC-NEXT: retq # sched: [1:1.00]
14;
15; HASWELL-LABEL: test_broadcasti128:
16; HASWELL: # BB#0:
17; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50]
18; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
19; HASWELL-NEXT: retq # sched: [2:1.00]
20;
21; SKYLAKE-LABEL: test_broadcasti128:
22; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +000023; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
24; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
25; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000026;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000027; SKX-LABEL: test_broadcasti128:
28; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +000029; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
30; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
31; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000032;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000033; ZNVER1-LABEL: test_broadcasti128:
34; ZNVER1: # BB#0:
35; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
36; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
37; ZNVER1-NEXT: retq # sched: [1:0.50]
38 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
39 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
40 %3 = add <8 x i32> %2, %a0
41 ret <8 x i32> %3
42}
43
Simon Pilgrim5a931c62017-09-12 11:17:01 +000044define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
45; GENERIC-LABEL: test_broadcastsd_ymm:
46; GENERIC: # BB#0:
47; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
48; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
49; GENERIC-NEXT: retq # sched: [1:1.00]
50;
51; HASWELL-LABEL: test_broadcastsd_ymm:
52; HASWELL: # BB#0:
53; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
54; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
55; HASWELL-NEXT: retq # sched: [2:1.00]
56;
57; SKYLAKE-LABEL: test_broadcastsd_ymm:
58; SKYLAKE: # BB#0:
59; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +000060; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000061; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000062;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000063; SKX-LABEL: test_broadcastsd_ymm:
64; SKX: # BB#0:
65; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +000066; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
67; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000068;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000069; ZNVER1-LABEL: test_broadcastsd_ymm:
70; ZNVER1: # BB#0:
71; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
72; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
73; ZNVER1-NEXT: retq # sched: [1:0.50]
74 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
75 %2 = fadd <4 x double> %1, %1
76 ret <4 x double> %2
77}
78
79define <4 x float> @test_broadcastss(<4 x float> %a0) {
80; GENERIC-LABEL: test_broadcastss:
81; GENERIC: # BB#0:
82; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
83; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
84; GENERIC-NEXT: retq # sched: [1:1.00]
85;
86; HASWELL-LABEL: test_broadcastss:
87; HASWELL: # BB#0:
88; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
89; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
90; HASWELL-NEXT: retq # sched: [2:1.00]
91;
92; SKYLAKE-LABEL: test_broadcastss:
93; SKYLAKE: # BB#0:
94; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +000095; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000096; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000097;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000098; SKX-LABEL: test_broadcastss:
99; SKX: # BB#0:
100; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000101; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
102; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000103;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000104; ZNVER1-LABEL: test_broadcastss:
105; ZNVER1: # BB#0:
106; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
107; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
108; ZNVER1-NEXT: retq # sched: [1:0.50]
109 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
110 %2 = fadd <4 x float> %1, %1
111 ret <4 x float> %2
112}
113
114define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
115; GENERIC-LABEL: test_broadcastss_ymm:
116; GENERIC: # BB#0:
117; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
118; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
119; GENERIC-NEXT: retq # sched: [1:1.00]
120;
121; HASWELL-LABEL: test_broadcastss_ymm:
122; HASWELL: # BB#0:
123; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
124; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
125; HASWELL-NEXT: retq # sched: [2:1.00]
126;
127; SKYLAKE-LABEL: test_broadcastss_ymm:
128; SKYLAKE: # BB#0:
129; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000130; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000131; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000132;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000133; SKX-LABEL: test_broadcastss_ymm:
134; SKX: # BB#0:
135; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000136; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
137; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000138;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000139; ZNVER1-LABEL: test_broadcastss_ymm:
140; ZNVER1: # BB#0:
141; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
142; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
143; ZNVER1-NEXT: retq # sched: [1:0.50]
144 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
145 %2 = fadd <8 x float> %1, %1
146 ret <8 x float> %2
147}
148
149define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
150; GENERIC-LABEL: test_extracti128:
151; GENERIC: # BB#0:
152; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
153; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
154; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
155; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
156; GENERIC-NEXT: vzeroupper
157; GENERIC-NEXT: retq # sched: [1:1.00]
158;
159; HASWELL-LABEL: test_extracti128:
160; HASWELL: # BB#0:
161; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
162; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
163; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
164; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
165; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
166; HASWELL-NEXT: retq # sched: [2:1.00]
167;
168; SKYLAKE-LABEL: test_extracti128:
169; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000170; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
171; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000172; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
173; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
174; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000175; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000176;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000177; SKX-LABEL: test_extracti128:
178; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000179; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
180; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000181; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
182; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
183; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000184; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000185;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000186; ZNVER1-LABEL: test_extracti128:
187; ZNVER1: # BB#0:
188; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
189; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
190; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25]
191; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50]
192; ZNVER1-NEXT: vzeroupper # sched: [100:?]
193; ZNVER1-NEXT: retq # sched: [1:0.50]
194 %1 = add <8 x i32> %a0, %a1
195 %2 = sub <8 x i32> %a0, %a1
196 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
197 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
198 store <4 x i32> %3, <4 x i32> *%a2
199 ret <4 x i32> %4
200}
201
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000202define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
203; GENERIC-LABEL: test_gatherdpd:
204; GENERIC: # BB#0:
205; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
206; GENERIC-NEXT: retq # sched: [1:1.00]
207;
208; HASWELL-LABEL: test_gatherdpd:
209; HASWELL: # BB#0:
210; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
211; HASWELL-NEXT: retq # sched: [2:1.00]
212;
213; SKYLAKE-LABEL: test_gatherdpd:
214; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000215; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
216; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000217;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000218; SKX-LABEL: test_gatherdpd:
219; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000220; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
221; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000222;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000223; ZNVER1-LABEL: test_gatherdpd:
224; ZNVER1: # BB#0:
225; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
226; ZNVER1-NEXT: retq # sched: [1:0.50]
227 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
228 ret <2 x double> %1
229}
230declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
231
232define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
233; GENERIC-LABEL: test_gatherdpd_ymm:
234; GENERIC: # BB#0:
235; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0
236; GENERIC-NEXT: retq # sched: [1:1.00]
237;
238; HASWELL-LABEL: test_gatherdpd_ymm:
239; HASWELL: # BB#0:
240; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?]
241; HASWELL-NEXT: retq # sched: [2:1.00]
242;
243; SKYLAKE-LABEL: test_gatherdpd_ymm:
244; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000245; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
246; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000247;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000248; SKX-LABEL: test_gatherdpd_ymm:
249; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000250; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
251; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000252;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000253; ZNVER1-LABEL: test_gatherdpd_ymm:
254; ZNVER1: # BB#0:
255; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
256; ZNVER1-NEXT: retq # sched: [1:0.50]
257 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
258 ret <4 x double> %1
259}
260declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
261
262define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
263; GENERIC-LABEL: test_gatherdps:
264; GENERIC: # BB#0:
265; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
266; GENERIC-NEXT: retq # sched: [1:1.00]
267;
268; HASWELL-LABEL: test_gatherdps:
269; HASWELL: # BB#0:
270; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
271; HASWELL-NEXT: retq # sched: [2:1.00]
272;
273; SKYLAKE-LABEL: test_gatherdps:
274; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000275; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
276; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000277;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000278; SKX-LABEL: test_gatherdps:
279; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000280; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
281; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000282;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000283; ZNVER1-LABEL: test_gatherdps:
284; ZNVER1: # BB#0:
285; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
286; ZNVER1-NEXT: retq # sched: [1:0.50]
287 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
288 ret <4 x float> %1
289}
290declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
291
292define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
293; GENERIC-LABEL: test_gatherdps_ymm:
294; GENERIC: # BB#0:
295; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0
296; GENERIC-NEXT: retq # sched: [1:1.00]
297;
298; HASWELL-LABEL: test_gatherdps_ymm:
299; HASWELL: # BB#0:
300; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?]
301; HASWELL-NEXT: retq # sched: [2:1.00]
302;
303; SKYLAKE-LABEL: test_gatherdps_ymm:
304; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000305; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
306; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000307;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000308; SKX-LABEL: test_gatherdps_ymm:
309; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000310; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
311; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000312;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000313; ZNVER1-LABEL: test_gatherdps_ymm:
314; ZNVER1: # BB#0:
315; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
316; ZNVER1-NEXT: retq # sched: [1:0.50]
317 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
318 ret <8 x float> %1
319}
320declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
321
322define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
323; GENERIC-LABEL: test_gatherqpd:
324; GENERIC: # BB#0:
325; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
326; GENERIC-NEXT: retq # sched: [1:1.00]
327;
328; HASWELL-LABEL: test_gatherqpd:
329; HASWELL: # BB#0:
330; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
331; HASWELL-NEXT: retq # sched: [2:1.00]
332;
333; SKYLAKE-LABEL: test_gatherqpd:
334; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000335; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
336; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000337;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000338; SKX-LABEL: test_gatherqpd:
339; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000340; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
341; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000342;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000343; ZNVER1-LABEL: test_gatherqpd:
344; ZNVER1: # BB#0:
345; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
346; ZNVER1-NEXT: retq # sched: [1:0.50]
347 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
348 ret <2 x double> %1
349}
350declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
351
352define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
353; GENERIC-LABEL: test_gatherqpd_ymm:
354; GENERIC: # BB#0:
355; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0
356; GENERIC-NEXT: retq # sched: [1:1.00]
357;
358; HASWELL-LABEL: test_gatherqpd_ymm:
359; HASWELL: # BB#0:
360; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?]
361; HASWELL-NEXT: retq # sched: [2:1.00]
362;
363; SKYLAKE-LABEL: test_gatherqpd_ymm:
364; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000365; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
366; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000367;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000368; SKX-LABEL: test_gatherqpd_ymm:
369; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000370; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
371; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000372;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000373; ZNVER1-LABEL: test_gatherqpd_ymm:
374; ZNVER1: # BB#0:
375; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
376; ZNVER1-NEXT: retq # sched: [1:0.50]
377 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
378 ret <4 x double> %1
379}
380declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
381
382define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
383; GENERIC-LABEL: test_gatherqps:
384; GENERIC: # BB#0:
385; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
386; GENERIC-NEXT: retq # sched: [1:1.00]
387;
388; HASWELL-LABEL: test_gatherqps:
389; HASWELL: # BB#0:
390; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
391; HASWELL-NEXT: retq # sched: [2:1.00]
392;
393; SKYLAKE-LABEL: test_gatherqps:
394; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000395; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
396; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000397;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000398; SKX-LABEL: test_gatherqps:
399; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000400; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
401; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000402;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000403; ZNVER1-LABEL: test_gatherqps:
404; ZNVER1: # BB#0:
405; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
406; ZNVER1-NEXT: retq # sched: [1:0.50]
407 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
408 ret <4 x float> %1
409}
410declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
411
412define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
413; GENERIC-LABEL: test_gatherqps_ymm:
414; GENERIC: # BB#0:
415; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0
416; GENERIC-NEXT: vzeroupper
417; GENERIC-NEXT: retq # sched: [1:1.00]
418;
419; HASWELL-LABEL: test_gatherqps_ymm:
420; HASWELL: # BB#0:
421; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?]
422; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
423; HASWELL-NEXT: retq # sched: [2:1.00]
424;
425; SKYLAKE-LABEL: test_gatherqps_ymm:
426; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000427; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000428; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000429; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000430;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000431; SKX-LABEL: test_gatherqps_ymm:
432; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000433; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000434; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000435; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000436;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000437; ZNVER1-LABEL: test_gatherqps_ymm:
438; ZNVER1: # BB#0:
439; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
440; ZNVER1-NEXT: vzeroupper # sched: [100:?]
441; ZNVER1-NEXT: retq # sched: [1:0.50]
442 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
443 ret <4 x float> %1
444}
445declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
446
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000447define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
448; GENERIC-LABEL: test_inserti128:
449; GENERIC: # BB#0:
450; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
451; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
452; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
453; GENERIC-NEXT: retq # sched: [1:1.00]
454;
455; HASWELL-LABEL: test_inserti128:
456; HASWELL: # BB#0:
457; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
458; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
459; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
460; HASWELL-NEXT: retq # sched: [2:1.00]
461;
462; SKYLAKE-LABEL: test_inserti128:
463; SKYLAKE: # BB#0:
464; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000465; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
466; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
467; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000468;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000469; SKX-LABEL: test_inserti128:
470; SKX: # BB#0:
471; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000472; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
473; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
474; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000475;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000476; ZNVER1-LABEL: test_inserti128:
477; ZNVER1: # BB#0:
478; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
479; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
480; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
481; ZNVER1-NEXT: retq # sched: [1:0.50]
482 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
483 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
484 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
485 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
486 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
487 %6 = add <8 x i32> %2, %5
488 ret <8 x i32> %6
489}
490
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000491define <4 x i64> @test_movntdqa(i8* %a0) {
492; GENERIC-LABEL: test_movntdqa:
493; GENERIC: # BB#0:
494; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [4:0.50]
495; GENERIC-NEXT: retq # sched: [1:1.00]
496;
497; HASWELL-LABEL: test_movntdqa:
498; HASWELL: # BB#0:
499; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50]
500; HASWELL-NEXT: retq # sched: [2:1.00]
501;
502; SKYLAKE-LABEL: test_movntdqa:
503; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000504; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
505; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000506;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000507; SKX-LABEL: test_movntdqa:
508; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000509; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
510; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000511;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000512; ZNVER1-LABEL: test_movntdqa:
513; ZNVER1: # BB#0:
514; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
515; ZNVER1-NEXT: retq # sched: [1:0.50]
516 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
517 ret <4 x i64> %1
518}
519declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
520
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000521define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
522; GENERIC-LABEL: test_mpsadbw:
523; GENERIC: # BB#0:
524; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
525; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
526; GENERIC-NEXT: retq # sched: [1:1.00]
527;
528; HASWELL-LABEL: test_mpsadbw:
529; HASWELL: # BB#0:
530; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
531; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
532; HASWELL-NEXT: retq # sched: [2:1.00]
533;
534; SKYLAKE-LABEL: test_mpsadbw:
535; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000536; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000537; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
538; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000539;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000540; SKX-LABEL: test_mpsadbw:
541; SKX: # BB#0:
542; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000543; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
544; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000545;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000546; ZNVER1-LABEL: test_mpsadbw:
547; ZNVER1: # BB#0:
548; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
549; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
550; ZNVER1-NEXT: retq # sched: [1:0.50]
551 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
552 %2 = bitcast <16 x i16> %1 to <32 x i8>
553 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
554 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
555 ret <16 x i16> %4
556}
557declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
558
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000559define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000560; GENERIC-LABEL: test_pabsb:
561; GENERIC: # BB#0:
562; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
563; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00]
564; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
565; GENERIC-NEXT: retq # sched: [1:1.00]
566;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000567; HASWELL-LABEL: test_pabsb:
568; HASWELL: # BB#0:
569; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000570; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000571; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000572; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000573;
Gadi Haber767d98b2017-08-30 08:08:50 +0000574; SKYLAKE-LABEL: test_pabsb:
575; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000576; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
577; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
578; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
579; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000580;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000581; SKX-LABEL: test_pabsb:
582; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000583; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
584; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
585; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
586; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000587;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000588; ZNVER1-LABEL: test_pabsb:
589; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000590; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
591; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
592; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000593; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000594 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
595 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
596 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
597 %4 = or <32 x i8> %1, %3
598 ret <32 x i8> %4
599}
600declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
601
602define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000603; GENERIC-LABEL: test_pabsd:
604; GENERIC: # BB#0:
605; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
606; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00]
607; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
608; GENERIC-NEXT: retq # sched: [1:1.00]
609;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000610; HASWELL-LABEL: test_pabsd:
611; HASWELL: # BB#0:
612; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000613; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000614; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000615; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000616;
Gadi Haber767d98b2017-08-30 08:08:50 +0000617; SKYLAKE-LABEL: test_pabsd:
618; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000619; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
620; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
621; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
622; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000623;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000624; SKX-LABEL: test_pabsd:
625; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000626; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
627; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
628; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
629; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000630;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000631; ZNVER1-LABEL: test_pabsd:
632; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000633; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
634; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
635; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000636; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000637 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
638 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
639 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
640 %4 = or <8 x i32> %1, %3
641 ret <8 x i32> %4
642}
643declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
644
645define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000646; GENERIC-LABEL: test_pabsw:
647; GENERIC: # BB#0:
648; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
649; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00]
650; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
651; GENERIC-NEXT: retq # sched: [1:1.00]
652;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000653; HASWELL-LABEL: test_pabsw:
654; HASWELL: # BB#0:
655; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000656; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000657; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000658; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000659;
Gadi Haber767d98b2017-08-30 08:08:50 +0000660; SKYLAKE-LABEL: test_pabsw:
661; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000662; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
663; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
664; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
665; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000666;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000667; SKX-LABEL: test_pabsw:
668; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000669; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
670; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
671; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
672; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000673;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000674; ZNVER1-LABEL: test_pabsw:
675; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000676; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
677; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
678; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000679; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000680 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
681 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
682 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
683 %4 = or <16 x i16> %1, %3
684 ret <16 x i16> %4
685}
686declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
687
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000688define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
689; GENERIC-LABEL: test_packssdw:
690; GENERIC: # BB#0:
691; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
692; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
693; GENERIC-NEXT: retq # sched: [1:1.00]
694;
695; HASWELL-LABEL: test_packssdw:
696; HASWELL: # BB#0:
697; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
698; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
699; HASWELL-NEXT: retq # sched: [2:1.00]
700;
701; SKYLAKE-LABEL: test_packssdw:
702; SKYLAKE: # BB#0:
703; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000704; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
705; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000706;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000707; SKX-LABEL: test_packssdw:
708; SKX: # BB#0:
709; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000710; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
711; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000712;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000713; ZNVER1-LABEL: test_packssdw:
714; ZNVER1: # BB#0:
715; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
716; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
717; ZNVER1-NEXT: retq # sched: [1:0.50]
718 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
719 %2 = bitcast <16 x i16> %1 to <8 x i32>
720 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
721 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
722 ret <16 x i16> %4
723}
724declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
725
726define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
727; GENERIC-LABEL: test_packsswb:
728; GENERIC: # BB#0:
729; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
730; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
731; GENERIC-NEXT: retq # sched: [1:1.00]
732;
733; HASWELL-LABEL: test_packsswb:
734; HASWELL: # BB#0:
735; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
736; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
737; HASWELL-NEXT: retq # sched: [2:1.00]
738;
739; SKYLAKE-LABEL: test_packsswb:
740; SKYLAKE: # BB#0:
741; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000742; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
743; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000744;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000745; SKX-LABEL: test_packsswb:
746; SKX: # BB#0:
747; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000748; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
749; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000750;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000751; ZNVER1-LABEL: test_packsswb:
752; ZNVER1: # BB#0:
753; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
754; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
755; ZNVER1-NEXT: retq # sched: [1:0.50]
756 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
757 %2 = bitcast <32 x i8> %1 to <16 x i16>
758 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
759 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
760 ret <32 x i8> %4
761}
762declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
763
764define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
765; GENERIC-LABEL: test_packusdw:
766; GENERIC: # BB#0:
767; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
768; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
769; GENERIC-NEXT: retq # sched: [1:1.00]
770;
771; HASWELL-LABEL: test_packusdw:
772; HASWELL: # BB#0:
773; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
774; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
775; HASWELL-NEXT: retq # sched: [2:1.00]
776;
777; SKYLAKE-LABEL: test_packusdw:
778; SKYLAKE: # BB#0:
779; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000780; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
781; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000782;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000783; SKX-LABEL: test_packusdw:
784; SKX: # BB#0:
785; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000786; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
787; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000788;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000789; ZNVER1-LABEL: test_packusdw:
790; ZNVER1: # BB#0:
791; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
792; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
793; ZNVER1-NEXT: retq # sched: [1:0.50]
794 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
795 %2 = bitcast <16 x i16> %1 to <8 x i32>
796 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
797 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
798 ret <16 x i16> %4
799}
800declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
801
802define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
803; GENERIC-LABEL: test_packuswb:
804; GENERIC: # BB#0:
805; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
806; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
807; GENERIC-NEXT: retq # sched: [1:1.00]
808;
809; HASWELL-LABEL: test_packuswb:
810; HASWELL: # BB#0:
811; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
812; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
813; HASWELL-NEXT: retq # sched: [2:1.00]
814;
815; SKYLAKE-LABEL: test_packuswb:
816; SKYLAKE: # BB#0:
817; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000818; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
819; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000820;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000821; SKX-LABEL: test_packuswb:
822; SKX: # BB#0:
823; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000824; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
825; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000826;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000827; ZNVER1-LABEL: test_packuswb:
828; ZNVER1: # BB#0:
829; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
830; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
831; ZNVER1-NEXT: retq # sched: [1:0.50]
832 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
833 %2 = bitcast <32 x i8> %1 to <16 x i16>
834 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
835 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
836 ret <32 x i8> %4
837}
838declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
839
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000840define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000841; GENERIC-LABEL: test_paddb:
842; GENERIC: # BB#0:
843; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
844; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
845; GENERIC-NEXT: retq # sched: [1:1.00]
846;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000847; HASWELL-LABEL: test_paddb:
848; HASWELL: # BB#0:
849; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000850; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
851; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000852;
Gadi Haber767d98b2017-08-30 08:08:50 +0000853; SKYLAKE-LABEL: test_paddb:
854; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000855; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
856; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
857; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000858;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000859; SKX-LABEL: test_paddb:
860; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000861; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
862; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
863; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000864;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000865; ZNVER1-LABEL: test_paddb:
866; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000867; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
868; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000869; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000870 %1 = add <32 x i8> %a0, %a1
871 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
872 %3 = add <32 x i8> %1, %2
873 ret <32 x i8> %3
874}
875
876define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000877; GENERIC-LABEL: test_paddd:
878; GENERIC: # BB#0:
879; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
880; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
881; GENERIC-NEXT: retq # sched: [1:1.00]
882;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000883; HASWELL-LABEL: test_paddd:
884; HASWELL: # BB#0:
885; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000886; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
887; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000888;
Gadi Haber767d98b2017-08-30 08:08:50 +0000889; SKYLAKE-LABEL: test_paddd:
890; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000891; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
892; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
893; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000894;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000895; SKX-LABEL: test_paddd:
896; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000897; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
898; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
899; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000900;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000901; ZNVER1-LABEL: test_paddd:
902; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000903; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
904; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000905; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000906 %1 = add <8 x i32> %a0, %a1
907 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
908 %3 = add <8 x i32> %1, %2
909 ret <8 x i32> %3
910}
911
912define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000913; GENERIC-LABEL: test_paddq:
914; GENERIC: # BB#0:
915; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
916; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
917; GENERIC-NEXT: retq # sched: [1:1.00]
918;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000919; HASWELL-LABEL: test_paddq:
920; HASWELL: # BB#0:
921; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000922; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
923; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000924;
Gadi Haber767d98b2017-08-30 08:08:50 +0000925; SKYLAKE-LABEL: test_paddq:
926; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000927; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
928; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
929; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000930;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000931; SKX-LABEL: test_paddq:
932; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000933; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
934; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
935; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000936;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000937; ZNVER1-LABEL: test_paddq:
938; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000939; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
940; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000941; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000942 %1 = add <4 x i64> %a0, %a1
943 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
944 %3 = add <4 x i64> %1, %2
945 ret <4 x i64> %3
946}
947
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000948define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
949; GENERIC-LABEL: test_paddsb:
950; GENERIC: # BB#0:
951; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
952; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
953; GENERIC-NEXT: retq # sched: [1:1.00]
954;
955; HASWELL-LABEL: test_paddsb:
956; HASWELL: # BB#0:
957; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
958; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
959; HASWELL-NEXT: retq # sched: [2:1.00]
960;
961; SKYLAKE-LABEL: test_paddsb:
962; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000963; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
964; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
965; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000966;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000967; SKX-LABEL: test_paddsb:
968; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000969; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
970; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
971; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000972;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000973; ZNVER1-LABEL: test_paddsb:
974; ZNVER1: # BB#0:
975; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
976; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
977; ZNVER1-NEXT: retq # sched: [1:0.50]
978 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
979 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
980 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2)
981 ret <32 x i8> %3
982}
983declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
984
985define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
986; GENERIC-LABEL: test_paddsw:
987; GENERIC: # BB#0:
988; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
989; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
990; GENERIC-NEXT: retq # sched: [1:1.00]
991;
992; HASWELL-LABEL: test_paddsw:
993; HASWELL: # BB#0:
994; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
995; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
996; HASWELL-NEXT: retq # sched: [2:1.00]
997;
998; SKYLAKE-LABEL: test_paddsw:
999; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001000; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1001; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1002; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001003;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001004; SKX-LABEL: test_paddsw:
1005; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001006; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1007; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1008; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001009;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001010; ZNVER1-LABEL: test_paddsw:
1011; ZNVER1: # BB#0:
1012; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1013; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1014; ZNVER1-NEXT: retq # sched: [1:0.50]
1015 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
1016 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1017 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2)
1018 ret <16 x i16> %3
1019}
1020declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
1021
1022define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1023; GENERIC-LABEL: test_paddusb:
1024; GENERIC: # BB#0:
1025; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1026; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1027; GENERIC-NEXT: retq # sched: [1:1.00]
1028;
1029; HASWELL-LABEL: test_paddusb:
1030; HASWELL: # BB#0:
1031; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1032; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1033; HASWELL-NEXT: retq # sched: [2:1.00]
1034;
1035; SKYLAKE-LABEL: test_paddusb:
1036; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001037; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1038; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1039; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001040;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001041; SKX-LABEL: test_paddusb:
1042; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001043; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1044; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1045; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001046;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001047; ZNVER1-LABEL: test_paddusb:
1048; ZNVER1: # BB#0:
1049; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1050; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1051; ZNVER1-NEXT: retq # sched: [1:0.50]
1052 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
1053 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1054 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2)
1055 ret <32 x i8> %3
1056}
1057declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
1058
1059define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1060; GENERIC-LABEL: test_paddusw:
1061; GENERIC: # BB#0:
1062; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1063; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1064; GENERIC-NEXT: retq # sched: [1:1.00]
1065;
1066; HASWELL-LABEL: test_paddusw:
1067; HASWELL: # BB#0:
1068; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1069; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1070; HASWELL-NEXT: retq # sched: [2:1.00]
1071;
1072; SKYLAKE-LABEL: test_paddusw:
1073; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001074; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1075; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1076; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001077;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001078; SKX-LABEL: test_paddusw:
1079; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001080; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1081; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1082; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001083;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001084; ZNVER1-LABEL: test_paddusw:
1085; ZNVER1: # BB#0:
1086; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1087; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1088; ZNVER1-NEXT: retq # sched: [1:0.50]
1089 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
1090 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1091 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2)
1092 ret <16 x i16> %3
1093}
1094declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
1095
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001096define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001097; GENERIC-LABEL: test_paddw:
1098; GENERIC: # BB#0:
1099; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1100; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1101; GENERIC-NEXT: retq # sched: [1:1.00]
1102;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001103; HASWELL-LABEL: test_paddw:
1104; HASWELL: # BB#0:
1105; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001106; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1107; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001108;
Gadi Haber767d98b2017-08-30 08:08:50 +00001109; SKYLAKE-LABEL: test_paddw:
1110; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001111; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1112; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1113; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001114;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001115; SKX-LABEL: test_paddw:
1116; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001117; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1118; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1119; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001120;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001121; ZNVER1-LABEL: test_paddw:
1122; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001123; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1124; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001125; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001126 %1 = add <16 x i16> %a0, %a1
1127 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1128 %3 = add <16 x i16> %1, %2
1129 ret <16 x i16> %3
1130}
1131
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001132define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1133; GENERIC-LABEL: test_palignr:
1134; GENERIC: # BB#0:
1135; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1136; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [5:1.00]
1137; GENERIC-NEXT: retq # sched: [1:1.00]
1138;
1139; HASWELL-LABEL: test_palignr:
1140; HASWELL: # BB#0:
1141; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1142; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1143; HASWELL-NEXT: retq # sched: [2:1.00]
1144;
1145; SKYLAKE-LABEL: test_palignr:
1146; SKYLAKE: # BB#0:
1147; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001148; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1149; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001150;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001151; SKX-LABEL: test_palignr:
1152; SKX: # BB#0:
1153; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001154; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1155; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001156;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001157; ZNVER1-LABEL: test_palignr:
1158; ZNVER1: # BB#0:
1159; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1160; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:0.50]
1161; ZNVER1-NEXT: retq # sched: [1:0.50]
1162 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1163 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1164 %3 = shufflevector <32 x i8> %2, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1165 ret <32 x i8> %3
1166}
1167
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001168define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001169; GENERIC-LABEL: test_pand:
1170; GENERIC: # BB#0:
1171; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1172; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
1173; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1174; GENERIC-NEXT: retq # sched: [1:1.00]
1175;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001176; HASWELL-LABEL: test_pand:
1177; HASWELL: # BB#0:
1178; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001179; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001180; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001181; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001182;
Gadi Haber767d98b2017-08-30 08:08:50 +00001183; SKYLAKE-LABEL: test_pand:
1184; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001185; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1186; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1187; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1188; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001189;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001190; SKX-LABEL: test_pand:
1191; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001192; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1193; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1194; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1195; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001196;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001197; ZNVER1-LABEL: test_pand:
1198; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001199; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1200; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1201; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001202; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001203 %1 = and <4 x i64> %a0, %a1
1204 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1205 %3 = and <4 x i64> %1, %2
1206 %4 = add <4 x i64> %3, %a1
1207 ret <4 x i64> %4
1208}
1209
1210define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001211; GENERIC-LABEL: test_pandn:
1212; GENERIC: # BB#0:
1213; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1214; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
1215; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1216; GENERIC-NEXT: retq # sched: [1:1.00]
1217;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001218; HASWELL-LABEL: test_pandn:
1219; HASWELL: # BB#0:
1220; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001221; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001222; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001223; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001224;
Gadi Haber767d98b2017-08-30 08:08:50 +00001225; SKYLAKE-LABEL: test_pandn:
1226; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001227; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1228; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1229; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1230; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001231;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001232; SKX-LABEL: test_pandn:
1233; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001234; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1235; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1236; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1237; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001238;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001239; ZNVER1-LABEL: test_pandn:
1240; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001241; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1242; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1243; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001244; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001245 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1246 %2 = and <4 x i64> %a1, %1
1247 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1248 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1249 %5 = and <4 x i64> %3, %4
1250 %6 = add <4 x i64> %2, %5
1251 ret <4 x i64> %6
1252}
1253
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001254define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1255; GENERIC-LABEL: test_pavgb:
1256; GENERIC: # BB#0:
1257; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1258; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1259; GENERIC-NEXT: retq # sched: [1:1.00]
1260;
1261; HASWELL-LABEL: test_pavgb:
1262; HASWELL: # BB#0:
1263; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1264; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1265; HASWELL-NEXT: retq # sched: [2:1.00]
1266;
1267; SKYLAKE-LABEL: test_pavgb:
1268; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001269; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1270; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1271; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001272;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001273; SKX-LABEL: test_pavgb:
1274; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001275; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1276; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1277; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001278;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001279; ZNVER1-LABEL: test_pavgb:
1280; ZNVER1: # BB#0:
1281; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1282; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1283; ZNVER1-NEXT: retq # sched: [1:0.50]
1284 %1 = zext <32 x i8> %a0 to <32 x i16>
1285 %2 = zext <32 x i8> %a1 to <32 x i16>
1286 %3 = add <32 x i16> %1, %2
1287 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1288 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1289 %6 = trunc <32 x i16> %5 to <32 x i8>
1290 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1291 %8 = zext <32 x i8> %6 to <32 x i16>
1292 %9 = zext <32 x i8> %7 to <32 x i16>
1293 %10 = add <32 x i16> %8, %9
1294 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1295 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1296 %13 = trunc <32 x i16> %12 to <32 x i8>
1297 ret <32 x i8> %13
1298}
1299
1300define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1301; GENERIC-LABEL: test_pavgw:
1302; GENERIC: # BB#0:
1303; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1304; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1305; GENERIC-NEXT: retq # sched: [1:1.00]
1306;
1307; HASWELL-LABEL: test_pavgw:
1308; HASWELL: # BB#0:
1309; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1310; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1311; HASWELL-NEXT: retq # sched: [2:1.00]
1312;
1313; SKYLAKE-LABEL: test_pavgw:
1314; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001315; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1316; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1317; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001318;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001319; SKX-LABEL: test_pavgw:
1320; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001321; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1322; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1323; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001324;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001325; ZNVER1-LABEL: test_pavgw:
1326; ZNVER1: # BB#0:
1327; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1328; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1329; ZNVER1-NEXT: retq # sched: [1:0.50]
1330 %1 = zext <16 x i16> %a0 to <16 x i32>
1331 %2 = zext <16 x i16> %a1 to <16 x i32>
1332 %3 = add <16 x i32> %1, %2
1333 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1334 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1335 %6 = trunc <16 x i32> %5 to <16 x i16>
1336 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1337 %8 = zext <16 x i16> %6 to <16 x i32>
1338 %9 = zext <16 x i16> %7 to <16 x i32>
1339 %10 = add <16 x i32> %8, %9
1340 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1341 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1342 %13 = trunc <16 x i32> %12 to <16 x i16>
1343 ret <16 x i16> %13
1344}
1345
1346define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1347; GENERIC-LABEL: test_pblendd:
1348; GENERIC: # BB#0:
1349; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1350; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
1351; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1352; GENERIC-NEXT: retq # sched: [1:1.00]
1353;
1354; HASWELL-LABEL: test_pblendd:
1355; HASWELL: # BB#0:
1356; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1357; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50]
1358; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1359; HASWELL-NEXT: retq # sched: [2:1.00]
1360;
1361; SKYLAKE-LABEL: test_pblendd:
1362; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001363; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1364; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1365; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1366; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001367;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001368; SKX-LABEL: test_pblendd:
1369; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001370; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1371; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1372; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1373; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001374;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001375; ZNVER1-LABEL: test_pblendd:
1376; ZNVER1: # BB#0:
1377; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1378; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1379; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1380; ZNVER1-NEXT: retq # sched: [1:0.50]
1381 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1382 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1383 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1384 %4 = add <4 x i32> %a0, %3
1385 ret <4 x i32> %4
1386}
1387
1388define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1389; GENERIC-LABEL: test_pblendd_ymm:
1390; GENERIC: # BB#0:
1391; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1392; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
1393; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1394; GENERIC-NEXT: retq # sched: [1:1.00]
1395;
1396; HASWELL-LABEL: test_pblendd_ymm:
1397; HASWELL: # BB#0:
1398; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1399; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50]
1400; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1401; HASWELL-NEXT: retq # sched: [2:1.00]
1402;
1403; SKYLAKE-LABEL: test_pblendd_ymm:
1404; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001405; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1406; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1407; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1408; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001409;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001410; SKX-LABEL: test_pblendd_ymm:
1411; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001412; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1413; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1414; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1415; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001416;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001417; ZNVER1-LABEL: test_pblendd_ymm:
1418; ZNVER1: # BB#0:
1419; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1420; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1421; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1422; ZNVER1-NEXT: retq # sched: [1:0.50]
1423 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1424 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1425 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1426 %4 = add <8 x i32> %a0, %3
1427 ret <8 x i32> %4
1428}
1429
1430define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1431; GENERIC-LABEL: test_pblendvb:
1432; GENERIC: # BB#0:
1433; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1434; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1435; GENERIC-NEXT: retq # sched: [1:1.00]
1436;
1437; HASWELL-LABEL: test_pblendvb:
1438; HASWELL: # BB#0:
1439; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1440; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
1441; HASWELL-NEXT: retq # sched: [2:1.00]
1442;
1443; SKYLAKE-LABEL: test_pblendvb:
1444; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001445; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001446; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1447; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001448;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001449; SKX-LABEL: test_pblendvb:
1450; SKX: # BB#0:
1451; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00001452; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1453; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001454;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001455; ZNVER1-LABEL: test_pblendvb:
1456; ZNVER1: # BB#0:
1457; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1458; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1459; ZNVER1-NEXT: retq # sched: [1:0.50]
1460 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1461 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1462 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1463 ret <32 x i8> %3
1464}
1465declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1466
1467define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1468; GENERIC-LABEL: test_pblendw:
1469; GENERIC: # BB#0:
1470; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
1471; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [5:0.50]
1472; GENERIC-NEXT: retq # sched: [1:1.00]
1473;
1474; HASWELL-LABEL: test_pblendw:
1475; HASWELL: # BB#0:
1476; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1477; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00]
1478; HASWELL-NEXT: retq # sched: [2:1.00]
1479;
1480; SKYLAKE-LABEL: test_pblendw:
1481; SKYLAKE: # BB#0:
1482; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001483; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1484; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001485;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001486; SKX-LABEL: test_pblendw:
1487; SKX: # BB#0:
1488; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001489; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1490; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001491;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001492; ZNVER1-LABEL: test_pblendw:
1493; ZNVER1: # BB#0:
1494; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
1495; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [9:0.50]
1496; ZNVER1-NEXT: retq # sched: [1:0.50]
1497 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1498 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1499 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1500 ret <16 x i16> %3
1501}
1502
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001503define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1504; GENERIC-LABEL: test_pbroadcastb:
1505; GENERIC: # BB#0:
1506; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
1507; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
1508; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1509; GENERIC-NEXT: retq # sched: [1:1.00]
1510;
1511; HASWELL-LABEL: test_pbroadcastb:
1512; HASWELL: # BB#0:
1513; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1514; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00]
1515; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1516; HASWELL-NEXT: retq # sched: [2:1.00]
1517;
1518; SKYLAKE-LABEL: test_pbroadcastb:
1519; SKYLAKE: # BB#0:
1520; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001521; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1522; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1523; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001524;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001525; SKX-LABEL: test_pbroadcastb:
1526; SKX: # BB#0:
1527; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001528; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1529; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1530; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001531;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001532; ZNVER1-LABEL: test_pbroadcastb:
1533; ZNVER1: # BB#0:
1534; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1535; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1536; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1537; ZNVER1-NEXT: retq # sched: [1:0.50]
1538 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1539 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1540 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1541 %4 = add <16 x i8> %1, %3
1542 ret <16 x i8> %4
1543}
1544
1545define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1546; GENERIC-LABEL: test_pbroadcastb_ymm:
1547; GENERIC: # BB#0:
1548; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1549; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
1550; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1551; GENERIC-NEXT: retq # sched: [1:1.00]
1552;
1553; HASWELL-LABEL: test_pbroadcastb_ymm:
1554; HASWELL: # BB#0:
1555; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1556; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00]
1557; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1558; HASWELL-NEXT: retq # sched: [2:1.00]
1559;
1560; SKYLAKE-LABEL: test_pbroadcastb_ymm:
1561; SKYLAKE: # BB#0:
1562; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001563; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1564; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1565; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001566;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001567; SKX-LABEL: test_pbroadcastb_ymm:
1568; SKX: # BB#0:
1569; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001570; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1571; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1572; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001573;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001574; ZNVER1-LABEL: test_pbroadcastb_ymm:
1575; ZNVER1: # BB#0:
1576; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1577; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1578; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1579; ZNVER1-NEXT: retq # sched: [1:0.50]
1580 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1581 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1582 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1583 %4 = add <32 x i8> %1, %3
1584 ret <32 x i8> %4
1585}
1586
1587define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1588; GENERIC-LABEL: test_pbroadcastd:
1589; GENERIC: # BB#0:
1590; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1591; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
1592; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1593; GENERIC-NEXT: retq # sched: [1:1.00]
1594;
1595; HASWELL-LABEL: test_pbroadcastd:
1596; HASWELL: # BB#0:
1597; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1598; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50]
1599; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1600; HASWELL-NEXT: retq # sched: [2:1.00]
1601;
1602; SKYLAKE-LABEL: test_pbroadcastd:
1603; SKYLAKE: # BB#0:
1604; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001605; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1606; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1607; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001608;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001609; SKX-LABEL: test_pbroadcastd:
1610; SKX: # BB#0:
1611; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001612; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
1613; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001614;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001615; ZNVER1-LABEL: test_pbroadcastd:
1616; ZNVER1: # BB#0:
1617; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1618; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1619; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1620; ZNVER1-NEXT: retq # sched: [1:0.50]
1621 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1622 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1623 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1624 %4 = add <4 x i32> %1, %3
1625 ret <4 x i32> %4
1626}
1627
1628define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1629; GENERIC-LABEL: test_pbroadcastd_ymm:
1630; GENERIC: # BB#0:
1631; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1632; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
1633; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1634; GENERIC-NEXT: retq # sched: [1:1.00]
1635;
1636; HASWELL-LABEL: test_pbroadcastd_ymm:
1637; HASWELL: # BB#0:
1638; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1639; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50]
1640; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1641; HASWELL-NEXT: retq # sched: [2:1.00]
1642;
1643; SKYLAKE-LABEL: test_pbroadcastd_ymm:
1644; SKYLAKE: # BB#0:
1645; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001646; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1647; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1648; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001649;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001650; SKX-LABEL: test_pbroadcastd_ymm:
1651; SKX: # BB#0:
1652; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001653; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1654; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001655;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001656; ZNVER1-LABEL: test_pbroadcastd_ymm:
1657; ZNVER1: # BB#0:
1658; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1659; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1660; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1661; ZNVER1-NEXT: retq # sched: [1:0.50]
1662 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1663 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1664 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1665 %4 = add <8 x i32> %1, %3
1666 ret <8 x i32> %4
1667}
1668
1669define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1670; GENERIC-LABEL: test_pbroadcastq:
1671; GENERIC: # BB#0:
1672; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1673; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
1674; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1675; GENERIC-NEXT: retq # sched: [1:1.00]
1676;
1677; HASWELL-LABEL: test_pbroadcastq:
1678; HASWELL: # BB#0:
1679; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1680; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50]
1681; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1682; HASWELL-NEXT: retq # sched: [2:1.00]
1683;
1684; SKYLAKE-LABEL: test_pbroadcastq:
1685; SKYLAKE: # BB#0:
1686; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001687; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1688; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1689; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001690;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001691; SKX-LABEL: test_pbroadcastq:
1692; SKX: # BB#0:
1693; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001694; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50]
1695; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001696;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001697; ZNVER1-LABEL: test_pbroadcastq:
1698; ZNVER1: # BB#0:
1699; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1700; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1701; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1702; ZNVER1-NEXT: retq # sched: [1:0.50]
1703 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1704 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1705 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1706 %4 = add <2 x i64> %1, %3
1707 ret <2 x i64> %4
1708}
1709
1710define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
1711; GENERIC-LABEL: test_pbroadcastq_ymm:
1712; GENERIC: # BB#0:
1713; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
1714; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
1715; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1716; GENERIC-NEXT: retq # sched: [1:1.00]
1717;
1718; HASWELL-LABEL: test_pbroadcastq_ymm:
1719; HASWELL: # BB#0:
1720; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
1721; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50]
1722; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1723; HASWELL-NEXT: retq # sched: [2:1.00]
1724;
1725; SKYLAKE-LABEL: test_pbroadcastq_ymm:
1726; SKYLAKE: # BB#0:
1727; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001728; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
1729; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1730; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001731;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001732; SKX-LABEL: test_pbroadcastq_ymm:
1733; SKX: # BB#0:
1734; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001735; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50]
1736; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001737;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001738; ZNVER1-LABEL: test_pbroadcastq_ymm:
1739; ZNVER1: # BB#0:
1740; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
1741; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
1742; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1743; ZNVER1-NEXT: retq # sched: [1:0.50]
1744 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
1745 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
1746 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
1747 %4 = add <4 x i64> %1, %3
1748 ret <4 x i64> %4
1749}
1750
1751define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
1752; GENERIC-LABEL: test_pbroadcastw:
1753; GENERIC: # BB#0:
1754; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
1755; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
1756; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1757; GENERIC-NEXT: retq # sched: [1:1.00]
1758;
1759; HASWELL-LABEL: test_pbroadcastw:
1760; HASWELL: # BB#0:
1761; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
1762; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00]
1763; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1764; HASWELL-NEXT: retq # sched: [2:1.00]
1765;
1766; SKYLAKE-LABEL: test_pbroadcastw:
1767; SKYLAKE: # BB#0:
1768; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001769; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
1770; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1771; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001772;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001773; SKX-LABEL: test_pbroadcastw:
1774; SKX: # BB#0:
1775; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001776; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
1777; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1778; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001779;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001780; ZNVER1-LABEL: test_pbroadcastw:
1781; ZNVER1: # BB#0:
1782; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
1783; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
1784; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1785; ZNVER1-NEXT: retq # sched: [1:0.50]
1786 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
1787 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
1788 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
1789 %4 = add <8 x i16> %1, %3
1790 ret <8 x i16> %4
1791}
1792
1793define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
1794; GENERIC-LABEL: test_pbroadcastw_ymm:
1795; GENERIC: # BB#0:
1796; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
1797; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
1798; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1799; GENERIC-NEXT: retq # sched: [1:1.00]
1800;
1801; HASWELL-LABEL: test_pbroadcastw_ymm:
1802; HASWELL: # BB#0:
1803; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
1804; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00]
1805; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1806; HASWELL-NEXT: retq # sched: [2:1.00]
1807;
1808; SKYLAKE-LABEL: test_pbroadcastw_ymm:
1809; SKYLAKE: # BB#0:
1810; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001811; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
1812; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1813; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001814;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001815; SKX-LABEL: test_pbroadcastw_ymm:
1816; SKX: # BB#0:
1817; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001818; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
1819; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1820; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001821;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001822; ZNVER1-LABEL: test_pbroadcastw_ymm:
1823; ZNVER1: # BB#0:
1824; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
1825; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
1826; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1827; ZNVER1-NEXT: retq # sched: [1:0.50]
1828 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
1829 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
1830 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
1831 %4 = add <16 x i16> %1, %3
1832 ret <16 x i16> %4
1833}
1834
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001835define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1836; GENERIC-LABEL: test_pcmpeqb:
1837; GENERIC: # BB#0:
1838; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1839; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1840; GENERIC-NEXT: retq # sched: [1:1.00]
1841;
1842; HASWELL-LABEL: test_pcmpeqb:
1843; HASWELL: # BB#0:
1844; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1845; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1846; HASWELL-NEXT: retq # sched: [2:1.00]
1847;
1848; SKYLAKE-LABEL: test_pcmpeqb:
1849; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001850; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1851; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1852; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001853;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001854; SKX-LABEL: test_pcmpeqb:
1855; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001856; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001857; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001858; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001859; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001860; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001861;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001862; ZNVER1-LABEL: test_pcmpeqb:
1863; ZNVER1: # BB#0:
1864; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1865; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1866; ZNVER1-NEXT: retq # sched: [1:0.50]
1867 %1 = icmp eq <32 x i8> %a0, %a1
1868 %2 = sext <32 x i1> %1 to <32 x i8>
1869 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
1870 %4 = icmp eq <32 x i8> %2, %3
1871 %5 = sext <32 x i1> %4 to <32 x i8>
1872 ret <32 x i8> %5
1873}
1874
1875define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1876; GENERIC-LABEL: test_pcmpeqd:
1877; GENERIC: # BB#0:
1878; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1879; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1880; GENERIC-NEXT: retq # sched: [1:1.00]
1881;
1882; HASWELL-LABEL: test_pcmpeqd:
1883; HASWELL: # BB#0:
1884; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1885; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1886; HASWELL-NEXT: retq # sched: [2:1.00]
1887;
1888; SKYLAKE-LABEL: test_pcmpeqd:
1889; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001890; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1891; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1892; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001893;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001894; SKX-LABEL: test_pcmpeqd:
1895; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001896; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001897; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001898; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001899; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001900; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001901;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001902; ZNVER1-LABEL: test_pcmpeqd:
1903; ZNVER1: # BB#0:
1904; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1905; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1906; ZNVER1-NEXT: retq # sched: [1:0.50]
1907 %1 = icmp eq <8 x i32> %a0, %a1
1908 %2 = sext <8 x i1> %1 to <8 x i32>
1909 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
1910 %4 = icmp eq <8 x i32> %2, %3
1911 %5 = sext <8 x i1> %4 to <8 x i32>
1912 ret <8 x i32> %5
1913}
1914
1915define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1916; GENERIC-LABEL: test_pcmpeqq:
1917; GENERIC: # BB#0:
1918; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1919; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1920; GENERIC-NEXT: retq # sched: [1:1.00]
1921;
1922; HASWELL-LABEL: test_pcmpeqq:
1923; HASWELL: # BB#0:
1924; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1925; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1926; HASWELL-NEXT: retq # sched: [2:1.00]
1927;
1928; SKYLAKE-LABEL: test_pcmpeqq:
1929; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001930; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1931; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1932; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001933;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001934; SKX-LABEL: test_pcmpeqq:
1935; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001936; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001937; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001938; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001939; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001940; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001941;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001942; ZNVER1-LABEL: test_pcmpeqq:
1943; ZNVER1: # BB#0:
1944; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1945; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1946; ZNVER1-NEXT: retq # sched: [1:0.50]
1947 %1 = icmp eq <4 x i64> %a0, %a1
1948 %2 = sext <4 x i1> %1 to <4 x i64>
1949 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1950 %4 = icmp eq <4 x i64> %2, %3
1951 %5 = sext <4 x i1> %4 to <4 x i64>
1952 ret <4 x i64> %5
1953}
1954
1955define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1956; GENERIC-LABEL: test_pcmpeqw:
1957; GENERIC: # BB#0:
1958; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1959; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1960; GENERIC-NEXT: retq # sched: [1:1.00]
1961;
1962; HASWELL-LABEL: test_pcmpeqw:
1963; HASWELL: # BB#0:
1964; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1965; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1966; HASWELL-NEXT: retq # sched: [2:1.00]
1967;
1968; SKYLAKE-LABEL: test_pcmpeqw:
1969; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001970; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1971; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1972; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001973;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001974; SKX-LABEL: test_pcmpeqw:
1975; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001976; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001977; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001978; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001979; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00001980; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001981;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001982; ZNVER1-LABEL: test_pcmpeqw:
1983; ZNVER1: # BB#0:
1984; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1985; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1986; ZNVER1-NEXT: retq # sched: [1:0.50]
1987 %1 = icmp eq <16 x i16> %a0, %a1
1988 %2 = sext <16 x i1> %1 to <16 x i16>
1989 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
1990 %4 = icmp eq <16 x i16> %2, %3
1991 %5 = sext <16 x i1> %4 to <16 x i16>
1992 ret <16 x i16> %5
1993}
1994
1995define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1996; GENERIC-LABEL: test_pcmpgtb:
1997; GENERIC: # BB#0:
1998; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1999; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2000; GENERIC-NEXT: retq # sched: [1:1.00]
2001;
2002; HASWELL-LABEL: test_pcmpgtb:
2003; HASWELL: # BB#0:
2004; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2005; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2006; HASWELL-NEXT: retq # sched: [2:1.00]
2007;
2008; SKYLAKE-LABEL: test_pcmpgtb:
2009; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002010; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2011; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2012; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002013;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002014; SKX-LABEL: test_pcmpgtb:
2015; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002016; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002017; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002018; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002019; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002020; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002021;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002022; ZNVER1-LABEL: test_pcmpgtb:
2023; ZNVER1: # BB#0:
2024; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2025; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2026; ZNVER1-NEXT: retq # sched: [1:0.50]
2027 %1 = icmp sgt <32 x i8> %a0, %a1
2028 %2 = sext <32 x i1> %1 to <32 x i8>
2029 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2030 %4 = icmp sgt <32 x i8> %2, %3
2031 %5 = sext <32 x i1> %4 to <32 x i8>
2032 ret <32 x i8> %5
2033}
2034
2035define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2036; GENERIC-LABEL: test_pcmpgtd:
2037; GENERIC: # BB#0:
2038; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2039; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2040; GENERIC-NEXT: retq # sched: [1:1.00]
2041;
2042; HASWELL-LABEL: test_pcmpgtd:
2043; HASWELL: # BB#0:
2044; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2045; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2046; HASWELL-NEXT: retq # sched: [2:1.00]
2047;
2048; SKYLAKE-LABEL: test_pcmpgtd:
2049; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002050; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2051; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2052; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002053;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002054; SKX-LABEL: test_pcmpgtd:
2055; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002056; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002057; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002058; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002059; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002060; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002061;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002062; ZNVER1-LABEL: test_pcmpgtd:
2063; ZNVER1: # BB#0:
2064; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2065; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2066; ZNVER1-NEXT: retq # sched: [1:0.50]
2067 %1 = icmp sgt <8 x i32> %a0, %a1
2068 %2 = sext <8 x i1> %1 to <8 x i32>
2069 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2070 %4 = icmp sgt <8 x i32> %2, %3
2071 %5 = sext <8 x i1> %4 to <8 x i32>
2072 ret <8 x i32> %5
2073}
2074
2075define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2076; GENERIC-LABEL: test_pcmpgtq:
2077; GENERIC: # BB#0:
2078; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2079; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2080; GENERIC-NEXT: retq # sched: [1:1.00]
2081;
2082; HASWELL-LABEL: test_pcmpgtq:
2083; HASWELL: # BB#0:
2084; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2085; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2086; HASWELL-NEXT: retq # sched: [2:1.00]
2087;
2088; SKYLAKE-LABEL: test_pcmpgtq:
2089; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002090; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002091; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2092; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002093;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002094; SKX-LABEL: test_pcmpgtq:
2095; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002096; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002097; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002098; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002099; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002100; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002101;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002102; ZNVER1-LABEL: test_pcmpgtq:
2103; ZNVER1: # BB#0:
2104; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2105; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2106; ZNVER1-NEXT: retq # sched: [1:0.50]
2107 %1 = icmp sgt <4 x i64> %a0, %a1
2108 %2 = sext <4 x i1> %1 to <4 x i64>
2109 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2110 %4 = icmp sgt <4 x i64> %2, %3
2111 %5 = sext <4 x i1> %4 to <4 x i64>
2112 ret <4 x i64> %5
2113}
2114
2115define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2116; GENERIC-LABEL: test_pcmpgtw:
2117; GENERIC: # BB#0:
2118; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2119; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2120; GENERIC-NEXT: retq # sched: [1:1.00]
2121;
2122; HASWELL-LABEL: test_pcmpgtw:
2123; HASWELL: # BB#0:
2124; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2125; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2126; HASWELL-NEXT: retq # sched: [2:1.00]
2127;
2128; SKYLAKE-LABEL: test_pcmpgtw:
2129; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002130; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2131; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2132; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002133;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002134; SKX-LABEL: test_pcmpgtw:
2135; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002136; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002137; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002138; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002139; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002140; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002141;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002142; ZNVER1-LABEL: test_pcmpgtw:
2143; ZNVER1: # BB#0:
2144; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2145; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2146; ZNVER1-NEXT: retq # sched: [1:0.50]
2147 %1 = icmp sgt <16 x i16> %a0, %a1
2148 %2 = sext <16 x i1> %1 to <16 x i16>
2149 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2150 %4 = icmp sgt <16 x i16> %2, %3
2151 %5 = sext <16 x i1> %4 to <16 x i16>
2152 ret <16 x i16> %5
2153}
2154
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002155define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2156; GENERIC-LABEL: test_perm2i128:
2157; GENERIC: # BB#0:
2158; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2159; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
2160; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2161; GENERIC-NEXT: retq # sched: [1:1.00]
2162;
2163; HASWELL-LABEL: test_perm2i128:
2164; HASWELL: # BB#0:
2165; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2166; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
2167; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2168; HASWELL-NEXT: retq # sched: [2:1.00]
2169;
2170; SKYLAKE-LABEL: test_perm2i128:
2171; SKYLAKE: # BB#0:
2172; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002173; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2174; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2175; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002176;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002177; SKX-LABEL: test_perm2i128:
2178; SKX: # BB#0:
2179; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002180; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2181; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2182; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002183;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002184; ZNVER1-LABEL: test_perm2i128:
2185; ZNVER1: # BB#0:
2186; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2187; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2188; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2189; ZNVER1-NEXT: retq # sched: [1:0.50]
2190 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2191 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2192 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2193 %4 = add <4 x i64> %1, %3
2194 ret <4 x i64> %4
2195}
2196
2197define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2198; GENERIC-LABEL: test_permd:
2199; GENERIC: # BB#0:
2200; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2201; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2202; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2203; GENERIC-NEXT: retq # sched: [1:1.00]
2204;
2205; HASWELL-LABEL: test_permd:
2206; HASWELL: # BB#0:
2207; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2208; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2209; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2210; HASWELL-NEXT: retq # sched: [2:1.00]
2211;
2212; SKYLAKE-LABEL: test_permd:
2213; SKYLAKE: # BB#0:
2214; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002215; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2216; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2217; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002218;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002219; SKX-LABEL: test_permd:
2220; SKX: # BB#0:
2221; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002222; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2223; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2224; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002225;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002226; ZNVER1-LABEL: test_permd:
2227; ZNVER1: # BB#0:
2228; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2229; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2230; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2231; ZNVER1-NEXT: retq # sched: [1:0.50]
2232 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2233 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2234 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2235 %4 = add <8 x i32> %1, %3
2236 ret <8 x i32> %4
2237}
2238declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2239
2240define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2241; GENERIC-LABEL: test_permpd:
2242; GENERIC: # BB#0:
2243; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2244; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2245; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2246; GENERIC-NEXT: retq # sched: [1:1.00]
2247;
2248; HASWELL-LABEL: test_permpd:
2249; HASWELL: # BB#0:
2250; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2251; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2252; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2253; HASWELL-NEXT: retq # sched: [2:1.00]
2254;
2255; SKYLAKE-LABEL: test_permpd:
2256; SKYLAKE: # BB#0:
2257; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002258; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002259; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002260; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002261;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002262; SKX-LABEL: test_permpd:
2263; SKX: # BB#0:
2264; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002265; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2266; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2267; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002268;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002269; ZNVER1-LABEL: test_permpd:
2270; ZNVER1: # BB#0:
2271; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2272; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2273; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2274; ZNVER1-NEXT: retq # sched: [1:0.50]
2275 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2276 %2 = load <4 x double>, <4 x double> *%a1, align 32
2277 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2278 %4 = fadd <4 x double> %1, %3
2279 ret <4 x double> %4
2280}
2281
2282define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2283; GENERIC-LABEL: test_permps:
2284; GENERIC: # BB#0:
2285; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2286; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2287; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2288; GENERIC-NEXT: retq # sched: [1:1.00]
2289;
2290; HASWELL-LABEL: test_permps:
2291; HASWELL: # BB#0:
2292; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2293; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2294; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2295; HASWELL-NEXT: retq # sched: [2:1.00]
2296;
2297; SKYLAKE-LABEL: test_permps:
2298; SKYLAKE: # BB#0:
2299; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002300; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002301; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002302; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002303;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002304; SKX-LABEL: test_permps:
2305; SKX: # BB#0:
2306; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002307; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2308; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
2309; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002310;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002311; ZNVER1-LABEL: test_permps:
2312; ZNVER1: # BB#0:
2313; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2314; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2315; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2316; ZNVER1-NEXT: retq # sched: [1:0.50]
2317 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2318 %2 = load <8 x float>, <8 x float> *%a2, align 32
2319 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2320 %4 = fadd <8 x float> %1, %3
2321 ret <8 x float> %4
2322}
2323declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2324
2325define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2326; GENERIC-LABEL: test_permq:
2327; GENERIC: # BB#0:
2328; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2329; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2330; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2331; GENERIC-NEXT: retq # sched: [1:1.00]
2332;
2333; HASWELL-LABEL: test_permq:
2334; HASWELL: # BB#0:
2335; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2336; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2337; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2338; HASWELL-NEXT: retq # sched: [2:1.00]
2339;
2340; SKYLAKE-LABEL: test_permq:
2341; SKYLAKE: # BB#0:
2342; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002343; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2344; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2345; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002346;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002347; SKX-LABEL: test_permq:
2348; SKX: # BB#0:
2349; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002350; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2351; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2352; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002353;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002354; ZNVER1-LABEL: test_permq:
2355; ZNVER1: # BB#0:
2356; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2357; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2358; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2359; ZNVER1-NEXT: retq # sched: [1:0.50]
2360 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2361 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2362 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2363 %4 = add <4 x i64> %1, %3
2364 ret <4 x i64> %4
2365}
2366
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002367define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2368; GENERIC-LABEL: test_pgatherdd:
2369; GENERIC: # BB#0:
2370; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
2371; GENERIC-NEXT: retq # sched: [1:1.00]
2372;
2373; HASWELL-LABEL: test_pgatherdd:
2374; HASWELL: # BB#0:
2375; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2376; HASWELL-NEXT: retq # sched: [2:1.00]
2377;
2378; SKYLAKE-LABEL: test_pgatherdd:
2379; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002380; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2381; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002382;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002383; SKX-LABEL: test_pgatherdd:
2384; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002385; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2386; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002387;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002388; ZNVER1-LABEL: test_pgatherdd:
2389; ZNVER1: # BB#0:
2390; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2391; ZNVER1-NEXT: retq # sched: [1:0.50]
2392 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2393 ret <4 x i32> %1
2394}
2395declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2396
2397define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2398; GENERIC-LABEL: test_pgatherdd_ymm:
2399; GENERIC: # BB#0:
2400; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
2401; GENERIC-NEXT: retq # sched: [1:1.00]
2402;
2403; HASWELL-LABEL: test_pgatherdd_ymm:
2404; HASWELL: # BB#0:
2405; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
2406; HASWELL-NEXT: retq # sched: [2:1.00]
2407;
2408; SKYLAKE-LABEL: test_pgatherdd_ymm:
2409; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002410; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2411; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002412;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002413; SKX-LABEL: test_pgatherdd_ymm:
2414; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002415; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2416; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002417;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002418; ZNVER1-LABEL: test_pgatherdd_ymm:
2419; ZNVER1: # BB#0:
2420; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2421; ZNVER1-NEXT: retq # sched: [1:0.50]
2422 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2423 ret <8 x i32> %1
2424}
2425declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2426
2427define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2428; GENERIC-LABEL: test_pgatherdq:
2429; GENERIC: # BB#0:
2430; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
2431; GENERIC-NEXT: retq # sched: [1:1.00]
2432;
2433; HASWELL-LABEL: test_pgatherdq:
2434; HASWELL: # BB#0:
2435; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2436; HASWELL-NEXT: retq # sched: [2:1.00]
2437;
2438; SKYLAKE-LABEL: test_pgatherdq:
2439; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002440; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2441; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002442;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002443; SKX-LABEL: test_pgatherdq:
2444; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002445; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2446; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002447;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002448; ZNVER1-LABEL: test_pgatherdq:
2449; ZNVER1: # BB#0:
2450; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2451; ZNVER1-NEXT: retq # sched: [1:0.50]
2452 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2453 ret <2 x i64> %1
2454}
2455declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2456
2457define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2458; GENERIC-LABEL: test_pgatherdq_ymm:
2459; GENERIC: # BB#0:
2460; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
2461; GENERIC-NEXT: retq # sched: [1:1.00]
2462;
2463; HASWELL-LABEL: test_pgatherdq_ymm:
2464; HASWELL: # BB#0:
2465; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?]
2466; HASWELL-NEXT: retq # sched: [2:1.00]
2467;
2468; SKYLAKE-LABEL: test_pgatherdq_ymm:
2469; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002470; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2471; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002472;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002473; SKX-LABEL: test_pgatherdq_ymm:
2474; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002475; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2476; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002477;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002478; ZNVER1-LABEL: test_pgatherdq_ymm:
2479; ZNVER1: # BB#0:
2480; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
2481; ZNVER1-NEXT: retq # sched: [1:0.50]
2482 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2483 ret <4 x i64> %1
2484}
2485declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2486
2487define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2488; GENERIC-LABEL: test_pgatherqd:
2489; GENERIC: # BB#0:
2490; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
2491; GENERIC-NEXT: retq # sched: [1:1.00]
2492;
2493; HASWELL-LABEL: test_pgatherqd:
2494; HASWELL: # BB#0:
2495; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2496; HASWELL-NEXT: retq # sched: [2:1.00]
2497;
2498; SKYLAKE-LABEL: test_pgatherqd:
2499; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002500; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2501; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002502;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002503; SKX-LABEL: test_pgatherqd:
2504; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002505; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2506; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002507;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002508; ZNVER1-LABEL: test_pgatherqd:
2509; ZNVER1: # BB#0:
2510; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2511; ZNVER1-NEXT: retq # sched: [1:0.50]
2512 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2513 ret <4 x i32> %1
2514}
2515declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2516
2517define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2518; GENERIC-LABEL: test_pgatherqd_ymm:
2519; GENERIC: # BB#0:
2520; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
2521; GENERIC-NEXT: vzeroupper
2522; GENERIC-NEXT: retq # sched: [1:1.00]
2523;
2524; HASWELL-LABEL: test_pgatherqd_ymm:
2525; HASWELL: # BB#0:
2526; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?]
2527; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2528; HASWELL-NEXT: retq # sched: [2:1.00]
2529;
2530; SKYLAKE-LABEL: test_pgatherqd_ymm:
2531; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002532; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002533; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002534; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002535;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002536; SKX-LABEL: test_pgatherqd_ymm:
2537; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002538; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002539; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002540; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002541;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002542; ZNVER1-LABEL: test_pgatherqd_ymm:
2543; ZNVER1: # BB#0:
2544; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
2545; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2546; ZNVER1-NEXT: retq # sched: [1:0.50]
2547 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2548 ret <4 x i32> %1
2549}
2550declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2551
2552define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2553; GENERIC-LABEL: test_pgatherqq:
2554; GENERIC: # BB#0:
2555; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
2556; GENERIC-NEXT: retq # sched: [1:1.00]
2557;
2558; HASWELL-LABEL: test_pgatherqq:
2559; HASWELL: # BB#0:
2560; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2561; HASWELL-NEXT: retq # sched: [2:1.00]
2562;
2563; SKYLAKE-LABEL: test_pgatherqq:
2564; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002565; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2566; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002567;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002568; SKX-LABEL: test_pgatherqq:
2569; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002570; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2571; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002572;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002573; ZNVER1-LABEL: test_pgatherqq:
2574; ZNVER1: # BB#0:
2575; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2576; ZNVER1-NEXT: retq # sched: [1:0.50]
2577 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2578 ret <2 x i64> %1
2579}
2580declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
2581
2582define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
2583; GENERIC-LABEL: test_pgatherqq_ymm:
2584; GENERIC: # BB#0:
2585; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
2586; GENERIC-NEXT: retq # sched: [1:1.00]
2587;
2588; HASWELL-LABEL: test_pgatherqq_ymm:
2589; HASWELL: # BB#0:
2590; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
2591; HASWELL-NEXT: retq # sched: [2:1.00]
2592;
2593; SKYLAKE-LABEL: test_pgatherqq_ymm:
2594; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002595; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2596; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002597;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002598; SKX-LABEL: test_pgatherqq_ymm:
2599; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002600; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2601; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002602;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002603; ZNVER1-LABEL: test_pgatherqq_ymm:
2604; ZNVER1: # BB#0:
2605; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2606; ZNVER1-NEXT: retq # sched: [1:0.50]
2607 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
2608 ret <4 x i64> %1
2609}
2610declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
2611
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002612define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2613; GENERIC-LABEL: test_phaddd:
2614; GENERIC: # BB#0:
2615; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2616; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
2617; GENERIC-NEXT: retq # sched: [1:1.00]
2618;
2619; HASWELL-LABEL: test_phaddd:
2620; HASWELL: # BB#0:
2621; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2622; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2623; HASWELL-NEXT: retq # sched: [2:1.00]
2624;
2625; SKYLAKE-LABEL: test_phaddd:
2626; SKYLAKE: # BB#0:
2627; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002628; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2629; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002630;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002631; SKX-LABEL: test_phaddd:
2632; SKX: # BB#0:
2633; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002634; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2635; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002636;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002637; ZNVER1-LABEL: test_phaddd:
2638; ZNVER1: # BB#0:
2639; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
2640; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:?]
2641; ZNVER1-NEXT: retq # sched: [1:0.50]
2642 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
2643 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2644 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
2645 ret <8 x i32> %3
2646}
2647declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
2648
2649define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2650; GENERIC-LABEL: test_phaddsw:
2651; GENERIC: # BB#0:
2652; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2653; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2654; GENERIC-NEXT: retq # sched: [1:1.00]
2655;
2656; HASWELL-LABEL: test_phaddsw:
2657; HASWELL: # BB#0:
2658; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2659; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2660; HASWELL-NEXT: retq # sched: [2:1.00]
2661;
2662; SKYLAKE-LABEL: test_phaddsw:
2663; SKYLAKE: # BB#0:
2664; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002665; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2666; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002667;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002668; SKX-LABEL: test_phaddsw:
2669; SKX: # BB#0:
2670; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002671; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2672; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002673;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002674; ZNVER1-LABEL: test_phaddsw:
2675; ZNVER1: # BB#0:
2676; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
2677; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
2678; ZNVER1-NEXT: retq # sched: [1:0.50]
2679 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
2680 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
2681 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
2682 ret <16 x i16> %3
2683}
2684declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
2685
2686define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2687; GENERIC-LABEL: test_phaddw:
2688; GENERIC: # BB#0:
2689; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2690; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
2691; GENERIC-NEXT: retq # sched: [1:1.00]
2692;
2693; HASWELL-LABEL: test_phaddw:
2694; HASWELL: # BB#0:
2695; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2696; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2697; HASWELL-NEXT: retq # sched: [2:1.00]
2698;
2699; SKYLAKE-LABEL: test_phaddw:
2700; SKYLAKE: # BB#0:
2701; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002702; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2703; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002704;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002705; SKX-LABEL: test_phaddw:
2706; SKX: # BB#0:
2707; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002708; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2709; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002710;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002711; ZNVER1-LABEL: test_phaddw:
2712; ZNVER1: # BB#0:
2713; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
2714; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:?]
2715; ZNVER1-NEXT: retq # sched: [1:0.50]
2716 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
2717 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
2718 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
2719 ret <16 x i16> %3
2720}
2721declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
2722
2723define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2724; GENERIC-LABEL: test_phsubd:
2725; GENERIC: # BB#0:
2726; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2727; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
2728; GENERIC-NEXT: retq # sched: [1:1.00]
2729;
2730; HASWELL-LABEL: test_phsubd:
2731; HASWELL: # BB#0:
2732; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2733; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2734; HASWELL-NEXT: retq # sched: [2:1.00]
2735;
2736; SKYLAKE-LABEL: test_phsubd:
2737; SKYLAKE: # BB#0:
2738; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002739; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2740; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002741;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002742; SKX-LABEL: test_phsubd:
2743; SKX: # BB#0:
2744; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002745; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2746; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002747;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002748; ZNVER1-LABEL: test_phsubd:
2749; ZNVER1: # BB#0:
2750; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
2751; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:?]
2752; ZNVER1-NEXT: retq # sched: [1:0.50]
2753 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
2754 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2755 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
2756 ret <8 x i32> %3
2757}
2758declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
2759
2760define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2761; GENERIC-LABEL: test_phsubsw:
2762; GENERIC: # BB#0:
2763; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2764; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2765; GENERIC-NEXT: retq # sched: [1:1.00]
2766;
2767; HASWELL-LABEL: test_phsubsw:
2768; HASWELL: # BB#0:
2769; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2770; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2771; HASWELL-NEXT: retq # sched: [2:1.00]
2772;
2773; SKYLAKE-LABEL: test_phsubsw:
2774; SKYLAKE: # BB#0:
2775; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002776; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2777; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002778;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002779; SKX-LABEL: test_phsubsw:
2780; SKX: # BB#0:
2781; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002782; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2783; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002784;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002785; ZNVER1-LABEL: test_phsubsw:
2786; ZNVER1: # BB#0:
2787; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
2788; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
2789; ZNVER1-NEXT: retq # sched: [1:0.50]
2790 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
2791 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
2792 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
2793 ret <16 x i16> %3
2794}
2795declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
2796
2797define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2798; GENERIC-LABEL: test_phsubw:
2799; GENERIC: # BB#0:
2800; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2801; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
2802; GENERIC-NEXT: retq # sched: [1:1.00]
2803;
2804; HASWELL-LABEL: test_phsubw:
2805; HASWELL: # BB#0:
2806; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2807; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
2808; HASWELL-NEXT: retq # sched: [2:1.00]
2809;
2810; SKYLAKE-LABEL: test_phsubw:
2811; SKYLAKE: # BB#0:
2812; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002813; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2814; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002815;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002816; SKX-LABEL: test_phsubw:
2817; SKX: # BB#0:
2818; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002819; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
2820; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002821;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002822; ZNVER1-LABEL: test_phsubw:
2823; ZNVER1: # BB#0:
2824; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
2825; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:?]
2826; ZNVER1-NEXT: retq # sched: [1:0.50]
2827 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
2828 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
2829 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
2830 ret <16 x i16> %3
2831}
2832declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
2833
2834define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2835; GENERIC-LABEL: test_pmaddubsw:
2836; GENERIC: # BB#0:
2837; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2838; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2839; GENERIC-NEXT: retq # sched: [1:1.00]
2840;
2841; HASWELL-LABEL: test_pmaddubsw:
2842; HASWELL: # BB#0:
2843; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2844; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2845; HASWELL-NEXT: retq # sched: [2:1.00]
2846;
2847; SKYLAKE-LABEL: test_pmaddubsw:
2848; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002849; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002850; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2851; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002852;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002853; SKX-LABEL: test_pmaddubsw:
2854; SKX: # BB#0:
2855; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00002856; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2857; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002858;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002859; ZNVER1-LABEL: test_pmaddubsw:
2860; ZNVER1: # BB#0:
2861; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
2862; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2863; ZNVER1-NEXT: retq # sched: [1:0.50]
2864 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
2865 %2 = bitcast <16 x i16> %1 to <32 x i8>
2866 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2867 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
2868 ret <16 x i16> %4
2869}
2870declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
2871
2872define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2873; GENERIC-LABEL: test_pmaddwd:
2874; GENERIC: # BB#0:
2875; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2876; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2877; GENERIC-NEXT: retq # sched: [1:1.00]
2878;
2879; HASWELL-LABEL: test_pmaddwd:
2880; HASWELL: # BB#0:
2881; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2882; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2883; HASWELL-NEXT: retq # sched: [2:1.00]
2884;
2885; SKYLAKE-LABEL: test_pmaddwd:
2886; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002887; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002888; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2889; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002890;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002891; SKX-LABEL: test_pmaddwd:
2892; SKX: # BB#0:
2893; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00002894; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2895; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002896;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002897; ZNVER1-LABEL: test_pmaddwd:
2898; ZNVER1: # BB#0:
2899; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
2900; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2901; ZNVER1-NEXT: retq # sched: [1:0.50]
2902 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
2903 %2 = bitcast <8 x i32> %1 to <16 x i16>
2904 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2905 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
2906 ret <8 x i32> %4
2907}
2908declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
2909
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002910define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
2911; GENERIC-LABEL: test_pmaskmovd:
2912; GENERIC: # BB#0:
2913; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
2914; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi)
2915; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
2916; GENERIC-NEXT: retq # sched: [1:1.00]
2917;
2918; HASWELL-LABEL: test_pmaskmovd:
2919; HASWELL: # BB#0:
2920; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
2921; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
2922; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
2923; HASWELL-NEXT: retq # sched: [2:1.00]
2924;
2925; SKYLAKE-LABEL: test_pmaskmovd:
2926; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002927; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2928; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002929; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002930; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002931;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002932; SKX-LABEL: test_pmaskmovd:
2933; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002934; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2935; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002936; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00002937; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002938;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002939; ZNVER1-LABEL: test_pmaskmovd:
2940; ZNVER1: # BB#0:
2941; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
2942; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:?]
2943; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
2944; ZNVER1-NEXT: retq # sched: [1:0.50]
2945 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
2946 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
2947 ret <4 x i32> %1
2948}
2949declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
2950declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
2951
2952define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
2953; GENERIC-LABEL: test_pmaskmovd_ymm:
2954; GENERIC: # BB#0:
2955; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2
2956; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
2957; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
2958; GENERIC-NEXT: retq # sched: [1:1.00]
2959;
2960; HASWELL-LABEL: test_pmaskmovd_ymm:
2961; HASWELL: # BB#0:
2962; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
2963; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
2964; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
2965; HASWELL-NEXT: retq # sched: [2:1.00]
2966;
2967; SKYLAKE-LABEL: test_pmaskmovd_ymm:
2968; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002969; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2970; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002971; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002972; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002973;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002974; SKX-LABEL: test_pmaskmovd_ymm:
2975; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002976; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2977; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002978; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00002979; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002980;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002981; ZNVER1-LABEL: test_pmaskmovd_ymm:
2982; ZNVER1: # BB#0:
2983; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
2984; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:?]
2985; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
2986; ZNVER1-NEXT: retq # sched: [1:0.50]
2987 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
2988 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
2989 ret <8 x i32> %1
2990}
2991declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
2992declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
2993
2994define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
2995; GENERIC-LABEL: test_pmaskmovq:
2996; GENERIC: # BB#0:
2997; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2
2998; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi)
2999; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3000; GENERIC-NEXT: retq # sched: [1:1.00]
3001;
3002; HASWELL-LABEL: test_pmaskmovq:
3003; HASWELL: # BB#0:
3004; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
3005; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
3006; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3007; HASWELL-NEXT: retq # sched: [2:1.00]
3008;
3009; SKYLAKE-LABEL: test_pmaskmovq:
3010; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003011; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3012; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003013; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003014; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003015;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003016; SKX-LABEL: test_pmaskmovq:
3017; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003018; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3019; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003020; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003021; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003022;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003023; ZNVER1-LABEL: test_pmaskmovq:
3024; ZNVER1: # BB#0:
3025; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3026; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:?]
3027; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3028; ZNVER1-NEXT: retq # sched: [1:0.50]
3029 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3030 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3031 ret <2 x i64> %1
3032}
3033declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3034declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3035
3036define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3037; GENERIC-LABEL: test_pmaskmovq_ymm:
3038; GENERIC: # BB#0:
3039; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2
3040; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi)
3041; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3042; GENERIC-NEXT: retq # sched: [1:1.00]
3043;
3044; HASWELL-LABEL: test_pmaskmovq_ymm:
3045; HASWELL: # BB#0:
3046; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
3047; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
3048; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3049; HASWELL-NEXT: retq # sched: [2:1.00]
3050;
3051; SKYLAKE-LABEL: test_pmaskmovq_ymm:
3052; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003053; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3054; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003055; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003056; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003057;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003058; SKX-LABEL: test_pmaskmovq_ymm:
3059; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003060; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3061; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003062; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003063; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003064;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003065; ZNVER1-LABEL: test_pmaskmovq_ymm:
3066; ZNVER1: # BB#0:
3067; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3068; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:?]
3069; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3070; ZNVER1-NEXT: retq # sched: [1:0.50]
3071 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3072 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3073 ret <4 x i64> %1
3074}
3075declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3076declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3077
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003078define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3079; GENERIC-LABEL: test_pmaxsb:
3080; GENERIC: # BB#0:
3081; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3082; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3083; GENERIC-NEXT: retq # sched: [1:1.00]
3084;
3085; HASWELL-LABEL: test_pmaxsb:
3086; HASWELL: # BB#0:
3087; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3088; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3089; HASWELL-NEXT: retq # sched: [2:1.00]
3090;
3091; SKYLAKE-LABEL: test_pmaxsb:
3092; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003093; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3094; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3095; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003096;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003097; SKX-LABEL: test_pmaxsb:
3098; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003099; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3100; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3101; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003102;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003103; ZNVER1-LABEL: test_pmaxsb:
3104; ZNVER1: # BB#0:
3105; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3106; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3107; ZNVER1-NEXT: retq # sched: [1:0.50]
3108 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3109 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3110 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3111 ret <32 x i8> %3
3112}
3113declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3114
3115define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3116; GENERIC-LABEL: test_pmaxsd:
3117; GENERIC: # BB#0:
3118; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3119; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3120; GENERIC-NEXT: retq # sched: [1:1.00]
3121;
3122; HASWELL-LABEL: test_pmaxsd:
3123; HASWELL: # BB#0:
3124; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3125; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3126; HASWELL-NEXT: retq # sched: [2:1.00]
3127;
3128; SKYLAKE-LABEL: test_pmaxsd:
3129; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003130; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3131; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3132; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003133;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003134; SKX-LABEL: test_pmaxsd:
3135; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003136; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3137; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3138; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003139;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003140; ZNVER1-LABEL: test_pmaxsd:
3141; ZNVER1: # BB#0:
3142; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3143; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3144; ZNVER1-NEXT: retq # sched: [1:0.50]
3145 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3146 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3147 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3148 ret <8 x i32> %3
3149}
3150declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3151
3152define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3153; GENERIC-LABEL: test_pmaxsw:
3154; GENERIC: # BB#0:
3155; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3156; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3157; GENERIC-NEXT: retq # sched: [1:1.00]
3158;
3159; HASWELL-LABEL: test_pmaxsw:
3160; HASWELL: # BB#0:
3161; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3162; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3163; HASWELL-NEXT: retq # sched: [2:1.00]
3164;
3165; SKYLAKE-LABEL: test_pmaxsw:
3166; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003167; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3168; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3169; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003170;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003171; SKX-LABEL: test_pmaxsw:
3172; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003173; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3174; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3175; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003176;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003177; ZNVER1-LABEL: test_pmaxsw:
3178; ZNVER1: # BB#0:
3179; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3180; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3181; ZNVER1-NEXT: retq # sched: [1:0.50]
3182 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3183 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3184 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3185 ret <16 x i16> %3
3186}
3187declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3188
3189define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3190; GENERIC-LABEL: test_pmaxub:
3191; GENERIC: # BB#0:
3192; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3193; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3194; GENERIC-NEXT: retq # sched: [1:1.00]
3195;
3196; HASWELL-LABEL: test_pmaxub:
3197; HASWELL: # BB#0:
3198; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3199; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3200; HASWELL-NEXT: retq # sched: [2:1.00]
3201;
3202; SKYLAKE-LABEL: test_pmaxub:
3203; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003204; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3205; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3206; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003207;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003208; SKX-LABEL: test_pmaxub:
3209; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003210; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3211; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3212; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003213;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003214; ZNVER1-LABEL: test_pmaxub:
3215; ZNVER1: # BB#0:
3216; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3217; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3218; ZNVER1-NEXT: retq # sched: [1:0.50]
3219 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3220 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3221 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3222 ret <32 x i8> %3
3223}
3224declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3225
3226define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3227; GENERIC-LABEL: test_pmaxud:
3228; GENERIC: # BB#0:
3229; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3230; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3231; GENERIC-NEXT: retq # sched: [1:1.00]
3232;
3233; HASWELL-LABEL: test_pmaxud:
3234; HASWELL: # BB#0:
3235; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3236; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3237; HASWELL-NEXT: retq # sched: [2:1.00]
3238;
3239; SKYLAKE-LABEL: test_pmaxud:
3240; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003241; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3242; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3243; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003244;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003245; SKX-LABEL: test_pmaxud:
3246; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003247; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3248; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3249; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003250;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003251; ZNVER1-LABEL: test_pmaxud:
3252; ZNVER1: # BB#0:
3253; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3254; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3255; ZNVER1-NEXT: retq # sched: [1:0.50]
3256 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3257 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3258 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3259 ret <8 x i32> %3
3260}
3261declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3262
3263define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3264; GENERIC-LABEL: test_pmaxuw:
3265; GENERIC: # BB#0:
3266; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3267; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3268; GENERIC-NEXT: retq # sched: [1:1.00]
3269;
3270; HASWELL-LABEL: test_pmaxuw:
3271; HASWELL: # BB#0:
3272; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3273; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3274; HASWELL-NEXT: retq # sched: [2:1.00]
3275;
3276; SKYLAKE-LABEL: test_pmaxuw:
3277; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003278; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3279; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3280; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003281;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003282; SKX-LABEL: test_pmaxuw:
3283; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003284; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3285; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3286; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003287;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003288; ZNVER1-LABEL: test_pmaxuw:
3289; ZNVER1: # BB#0:
3290; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3291; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3292; ZNVER1-NEXT: retq # sched: [1:0.50]
3293 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3294 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3295 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3296 ret <16 x i16> %3
3297}
3298declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3299
3300define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3301; GENERIC-LABEL: test_pminsb:
3302; GENERIC: # BB#0:
3303; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3304; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3305; GENERIC-NEXT: retq # sched: [1:1.00]
3306;
3307; HASWELL-LABEL: test_pminsb:
3308; HASWELL: # BB#0:
3309; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3310; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3311; HASWELL-NEXT: retq # sched: [2:1.00]
3312;
3313; SKYLAKE-LABEL: test_pminsb:
3314; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003315; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3316; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3317; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003318;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003319; SKX-LABEL: test_pminsb:
3320; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003321; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3322; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3323; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003324;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003325; ZNVER1-LABEL: test_pminsb:
3326; ZNVER1: # BB#0:
3327; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3328; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3329; ZNVER1-NEXT: retq # sched: [1:0.50]
3330 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3331 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3332 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3333 ret <32 x i8> %3
3334}
3335declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3336
3337define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3338; GENERIC-LABEL: test_pminsd:
3339; GENERIC: # BB#0:
3340; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3341; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3342; GENERIC-NEXT: retq # sched: [1:1.00]
3343;
3344; HASWELL-LABEL: test_pminsd:
3345; HASWELL: # BB#0:
3346; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3347; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3348; HASWELL-NEXT: retq # sched: [2:1.00]
3349;
3350; SKYLAKE-LABEL: test_pminsd:
3351; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003352; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3353; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3354; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003355;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003356; SKX-LABEL: test_pminsd:
3357; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003358; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3359; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3360; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003361;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003362; ZNVER1-LABEL: test_pminsd:
3363; ZNVER1: # BB#0:
3364; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3365; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3366; ZNVER1-NEXT: retq # sched: [1:0.50]
3367 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3368 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3369 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3370 ret <8 x i32> %3
3371}
3372declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3373
3374define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3375; GENERIC-LABEL: test_pminsw:
3376; GENERIC: # BB#0:
3377; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3378; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3379; GENERIC-NEXT: retq # sched: [1:1.00]
3380;
3381; HASWELL-LABEL: test_pminsw:
3382; HASWELL: # BB#0:
3383; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3384; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3385; HASWELL-NEXT: retq # sched: [2:1.00]
3386;
3387; SKYLAKE-LABEL: test_pminsw:
3388; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003389; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3390; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3391; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003392;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003393; SKX-LABEL: test_pminsw:
3394; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003395; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3396; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3397; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003398;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003399; ZNVER1-LABEL: test_pminsw:
3400; ZNVER1: # BB#0:
3401; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3402; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3403; ZNVER1-NEXT: retq # sched: [1:0.50]
3404 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3405 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3406 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3407 ret <16 x i16> %3
3408}
3409declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3410
3411define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3412; GENERIC-LABEL: test_pminub:
3413; GENERIC: # BB#0:
3414; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3415; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3416; GENERIC-NEXT: retq # sched: [1:1.00]
3417;
3418; HASWELL-LABEL: test_pminub:
3419; HASWELL: # BB#0:
3420; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3421; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3422; HASWELL-NEXT: retq # sched: [2:1.00]
3423;
3424; SKYLAKE-LABEL: test_pminub:
3425; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003426; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3427; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3428; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003429;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003430; SKX-LABEL: test_pminub:
3431; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003432; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3433; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3434; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003435;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003436; ZNVER1-LABEL: test_pminub:
3437; ZNVER1: # BB#0:
3438; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3439; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3440; ZNVER1-NEXT: retq # sched: [1:0.50]
3441 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
3442 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3443 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
3444 ret <32 x i8> %3
3445}
3446declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
3447
3448define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3449; GENERIC-LABEL: test_pminud:
3450; GENERIC: # BB#0:
3451; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3452; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3453; GENERIC-NEXT: retq # sched: [1:1.00]
3454;
3455; HASWELL-LABEL: test_pminud:
3456; HASWELL: # BB#0:
3457; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3458; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3459; HASWELL-NEXT: retq # sched: [2:1.00]
3460;
3461; SKYLAKE-LABEL: test_pminud:
3462; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003463; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3464; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3465; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003466;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003467; SKX-LABEL: test_pminud:
3468; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003469; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3470; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3471; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003472;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003473; ZNVER1-LABEL: test_pminud:
3474; ZNVER1: # BB#0:
3475; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3476; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3477; ZNVER1-NEXT: retq # sched: [1:0.50]
3478 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
3479 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3480 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
3481 ret <8 x i32> %3
3482}
3483declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
3484
3485define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3486; GENERIC-LABEL: test_pminuw:
3487; GENERIC: # BB#0:
3488; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3489; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3490; GENERIC-NEXT: retq # sched: [1:1.00]
3491;
3492; HASWELL-LABEL: test_pminuw:
3493; HASWELL: # BB#0:
3494; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3495; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3496; HASWELL-NEXT: retq # sched: [2:1.00]
3497;
3498; SKYLAKE-LABEL: test_pminuw:
3499; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003500; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3501; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3502; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003503;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003504; SKX-LABEL: test_pminuw:
3505; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003506; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3507; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3508; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003509;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003510; ZNVER1-LABEL: test_pminuw:
3511; ZNVER1: # BB#0:
3512; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3513; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3514; ZNVER1-NEXT: retq # sched: [1:0.50]
3515 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
3516 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3517 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
3518 ret <16 x i16> %3
3519}
3520declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
3521
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003522define i32 @test_pmovmskb(<32 x i8> %a0) {
3523; GENERIC-LABEL: test_pmovmskb:
3524; GENERIC: # BB#0:
3525; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
3526; GENERIC-NEXT: vzeroupper
3527; GENERIC-NEXT: retq # sched: [1:1.00]
3528;
3529; HASWELL-LABEL: test_pmovmskb:
3530; HASWELL: # BB#0:
3531; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
3532; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
3533; HASWELL-NEXT: retq # sched: [2:1.00]
3534;
3535; SKYLAKE-LABEL: test_pmovmskb:
3536; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003537; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003538; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003539; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003540;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003541; SKX-LABEL: test_pmovmskb:
3542; SKX: # BB#0:
3543; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
3544; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003545; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003546;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003547; ZNVER1-LABEL: test_pmovmskb:
3548; ZNVER1: # BB#0:
3549; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
3550; ZNVER1-NEXT: vzeroupper # sched: [100:?]
3551; ZNVER1-NEXT: retq # sched: [1:0.50]
3552 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
3553 ret i32 %1
3554}
3555declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
3556
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003557define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
3558; GENERIC-LABEL: test_pmovsxbd:
3559; GENERIC: # BB#0:
3560; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
3561; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
3562; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3563; GENERIC-NEXT: retq # sched: [1:1.00]
3564;
3565; HASWELL-LABEL: test_pmovsxbd:
3566; HASWELL: # BB#0:
3567; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
3568; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00]
3569; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3570; HASWELL-NEXT: retq # sched: [2:1.00]
3571;
3572; SKYLAKE-LABEL: test_pmovsxbd:
3573; SKYLAKE: # BB#0:
3574; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003575; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
3576; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3577; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003578;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003579; SKX-LABEL: test_pmovsxbd:
3580; SKX: # BB#0:
3581; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003582; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
3583; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3584; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003585;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003586; ZNVER1-LABEL: test_pmovsxbd:
3587; ZNVER1: # BB#0:
3588; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
3589; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
3590; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3591; ZNVER1-NEXT: retq # sched: [1:0.50]
3592 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3593 %2 = sext <8 x i8> %1 to <8 x i32>
3594 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
3595 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3596 %5 = sext <8 x i8> %4 to <8 x i32>
3597 %6 = add <8 x i32> %2, %5
3598 ret <8 x i32> %6
3599}
3600
3601define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
3602; GENERIC-LABEL: test_pmovsxbq:
3603; GENERIC: # BB#0:
3604; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
3605; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
3606; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3607; GENERIC-NEXT: retq # sched: [1:1.00]
3608;
3609; HASWELL-LABEL: test_pmovsxbq:
3610; HASWELL: # BB#0:
3611; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
3612; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00]
3613; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3614; HASWELL-NEXT: retq # sched: [2:1.00]
3615;
3616; SKYLAKE-LABEL: test_pmovsxbq:
3617; SKYLAKE: # BB#0:
3618; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003619; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
3620; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3621; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003622;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003623; SKX-LABEL: test_pmovsxbq:
3624; SKX: # BB#0:
3625; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003626; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
3627; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3628; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003629;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003630; ZNVER1-LABEL: test_pmovsxbq:
3631; ZNVER1: # BB#0:
3632; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
3633; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
3634; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3635; ZNVER1-NEXT: retq # sched: [1:0.50]
3636 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3637 %2 = sext <4 x i8> %1 to <4 x i64>
3638 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
3639 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3640 %5 = sext <4 x i8> %4 to <4 x i64>
3641 %6 = add <4 x i64> %2, %5
3642 ret <4 x i64> %6
3643}
3644
3645define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
3646; GENERIC-LABEL: test_pmovsxbw:
3647; GENERIC: # BB#0:
3648; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
3649; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
3650; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3651; GENERIC-NEXT: retq # sched: [1:1.00]
3652;
3653; HASWELL-LABEL: test_pmovsxbw:
3654; HASWELL: # BB#0:
3655; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
3656; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00]
3657; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3658; HASWELL-NEXT: retq # sched: [2:1.00]
3659;
3660; SKYLAKE-LABEL: test_pmovsxbw:
3661; SKYLAKE: # BB#0:
3662; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003663; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
3664; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3665; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003666;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003667; SKX-LABEL: test_pmovsxbw:
3668; SKX: # BB#0:
3669; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003670; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
3671; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3672; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003673;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003674; ZNVER1-LABEL: test_pmovsxbw:
3675; ZNVER1: # BB#0:
3676; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
3677; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
3678; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3679; ZNVER1-NEXT: retq # sched: [1:0.50]
3680 %1 = sext <16 x i8> %a0 to <16 x i16>
3681 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
3682 %3 = sext <16 x i8> %2 to <16 x i16>
3683 %4 = add <16 x i16> %1, %3
3684 ret <16 x i16> %4
3685}
3686
3687define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
3688; GENERIC-LABEL: test_pmovsxdq:
3689; GENERIC: # BB#0:
3690; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
3691; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
3692; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3693; GENERIC-NEXT: retq # sched: [1:1.00]
3694;
3695; HASWELL-LABEL: test_pmovsxdq:
3696; HASWELL: # BB#0:
3697; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
3698; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00]
3699; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3700; HASWELL-NEXT: retq # sched: [2:1.00]
3701;
3702; SKYLAKE-LABEL: test_pmovsxdq:
3703; SKYLAKE: # BB#0:
3704; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003705; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
3706; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3707; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003708;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003709; SKX-LABEL: test_pmovsxdq:
3710; SKX: # BB#0:
3711; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003712; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
3713; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3714; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003715;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003716; ZNVER1-LABEL: test_pmovsxdq:
3717; ZNVER1: # BB#0:
3718; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
3719; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
3720; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3721; ZNVER1-NEXT: retq # sched: [1:0.50]
3722 %1 = sext <4 x i32> %a0 to <4 x i64>
3723 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
3724 %3 = sext <4 x i32> %2 to <4 x i64>
3725 %4 = add <4 x i64> %1, %3
3726 ret <4 x i64> %4
3727}
3728
3729define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
3730; GENERIC-LABEL: test_pmovsxwd:
3731; GENERIC: # BB#0:
3732; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
3733; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
3734; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3735; GENERIC-NEXT: retq # sched: [1:1.00]
3736;
3737; HASWELL-LABEL: test_pmovsxwd:
3738; HASWELL: # BB#0:
3739; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
3740; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00]
3741; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3742; HASWELL-NEXT: retq # sched: [2:1.00]
3743;
3744; SKYLAKE-LABEL: test_pmovsxwd:
3745; SKYLAKE: # BB#0:
3746; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003747; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
3748; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3749; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003750;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003751; SKX-LABEL: test_pmovsxwd:
3752; SKX: # BB#0:
3753; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003754; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
3755; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3756; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003757;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003758; ZNVER1-LABEL: test_pmovsxwd:
3759; ZNVER1: # BB#0:
3760; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
3761; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
3762; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3763; ZNVER1-NEXT: retq # sched: [1:0.50]
3764 %1 = sext <8 x i16> %a0 to <8 x i32>
3765 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
3766 %3 = sext <8 x i16> %2 to <8 x i32>
3767 %4 = add <8 x i32> %1, %3
3768 ret <8 x i32> %4
3769}
3770
3771define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
3772; GENERIC-LABEL: test_pmovsxwq:
3773; GENERIC: # BB#0:
3774; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
3775; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
3776; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3777; GENERIC-NEXT: retq # sched: [1:1.00]
3778;
3779; HASWELL-LABEL: test_pmovsxwq:
3780; HASWELL: # BB#0:
3781; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
3782; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00]
3783; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3784; HASWELL-NEXT: retq # sched: [2:1.00]
3785;
3786; SKYLAKE-LABEL: test_pmovsxwq:
3787; SKYLAKE: # BB#0:
3788; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003789; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
3790; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3791; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003792;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003793; SKX-LABEL: test_pmovsxwq:
3794; SKX: # BB#0:
3795; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003796; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
3797; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3798; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003799;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003800; ZNVER1-LABEL: test_pmovsxwq:
3801; ZNVER1: # BB#0:
3802; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
3803; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
3804; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3805; ZNVER1-NEXT: retq # sched: [1:0.50]
3806 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3807 %2 = sext <4 x i16> %1 to <4 x i64>
3808 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
3809 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3810 %5 = sext <4 x i16> %4 to <4 x i64>
3811 %6 = add <4 x i64> %2, %5
3812 ret <4 x i64> %6
3813}
3814
3815define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
3816; GENERIC-LABEL: test_pmovzxbd:
3817; GENERIC: # BB#0:
3818; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
3819; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
3820; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3821; GENERIC-NEXT: retq # sched: [1:1.00]
3822;
3823; HASWELL-LABEL: test_pmovzxbd:
3824; HASWELL: # BB#0:
3825; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
3826; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00]
3827; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3828; HASWELL-NEXT: retq # sched: [2:1.00]
3829;
3830; SKYLAKE-LABEL: test_pmovzxbd:
3831; SKYLAKE: # BB#0:
3832; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003833; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3834; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3835; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003836;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003837; SKX-LABEL: test_pmovzxbd:
3838; SKX: # BB#0:
3839; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003840; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3841; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3842; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003843;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003844; ZNVER1-LABEL: test_pmovzxbd:
3845; ZNVER1: # BB#0:
3846; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
3847; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
3848; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3849; ZNVER1-NEXT: retq # sched: [1:0.50]
3850 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3851 %2 = zext <8 x i8> %1 to <8 x i32>
3852 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
3853 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3854 %5 = zext <8 x i8> %4 to <8 x i32>
3855 %6 = add <8 x i32> %2, %5
3856 ret <8 x i32> %6
3857}
3858
3859define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
3860; GENERIC-LABEL: test_pmovzxbq:
3861; GENERIC: # BB#0:
3862; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
3863; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
3864; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3865; GENERIC-NEXT: retq # sched: [1:1.00]
3866;
3867; HASWELL-LABEL: test_pmovzxbq:
3868; HASWELL: # BB#0:
3869; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
3870; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
3871; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3872; HASWELL-NEXT: retq # sched: [2:1.00]
3873;
3874; SKYLAKE-LABEL: test_pmovzxbq:
3875; SKYLAKE: # BB#0:
3876; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003877; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3878; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3879; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003880;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003881; SKX-LABEL: test_pmovzxbq:
3882; SKX: # BB#0:
3883; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003884; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3885; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3886; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003887;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003888; ZNVER1-LABEL: test_pmovzxbq:
3889; ZNVER1: # BB#0:
3890; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
3891; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
3892; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3893; ZNVER1-NEXT: retq # sched: [1:0.50]
3894 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3895 %2 = zext <4 x i8> %1 to <4 x i64>
3896 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
3897 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3898 %5 = zext <4 x i8> %4 to <4 x i64>
3899 %6 = add <4 x i64> %2, %5
3900 ret <4 x i64> %6
3901}
3902
3903define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
3904; GENERIC-LABEL: test_pmovzxbw:
3905; GENERIC: # BB#0:
3906; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
3907; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
3908; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3909; GENERIC-NEXT: retq # sched: [1:1.00]
3910;
3911; HASWELL-LABEL: test_pmovzxbw:
3912; HASWELL: # BB#0:
3913; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
3914; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00]
3915; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3916; HASWELL-NEXT: retq # sched: [2:1.00]
3917;
3918; SKYLAKE-LABEL: test_pmovzxbw:
3919; SKYLAKE: # BB#0:
3920; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003921; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3922; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3923; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003924;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003925; SKX-LABEL: test_pmovzxbw:
3926; SKX: # BB#0:
3927; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003928; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3929; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3930; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003931;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003932; ZNVER1-LABEL: test_pmovzxbw:
3933; ZNVER1: # BB#0:
3934; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
3935; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
3936; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3937; ZNVER1-NEXT: retq # sched: [1:0.50]
3938 %1 = zext <16 x i8> %a0 to <16 x i16>
3939 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
3940 %3 = zext <16 x i8> %2 to <16 x i16>
3941 %4 = add <16 x i16> %1, %3
3942 ret <16 x i16> %4
3943}
3944
3945define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
3946; GENERIC-LABEL: test_pmovzxdq:
3947; GENERIC: # BB#0:
3948; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
3949; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
3950; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3951; GENERIC-NEXT: retq # sched: [1:1.00]
3952;
3953; HASWELL-LABEL: test_pmovzxdq:
3954; HASWELL: # BB#0:
3955; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
3956; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00]
3957; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3958; HASWELL-NEXT: retq # sched: [2:1.00]
3959;
3960; SKYLAKE-LABEL: test_pmovzxdq:
3961; SKYLAKE: # BB#0:
3962; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003963; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
3964; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3965; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003966;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003967; SKX-LABEL: test_pmovzxdq:
3968; SKX: # BB#0:
3969; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003970; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
3971; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3972; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003973;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003974; ZNVER1-LABEL: test_pmovzxdq:
3975; ZNVER1: # BB#0:
3976; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
3977; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
3978; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3979; ZNVER1-NEXT: retq # sched: [1:0.50]
3980 %1 = zext <4 x i32> %a0 to <4 x i64>
3981 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
3982 %3 = zext <4 x i32> %2 to <4 x i64>
3983 %4 = add <4 x i64> %1, %3
3984 ret <4 x i64> %4
3985}
3986
3987define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
3988; GENERIC-LABEL: test_pmovzxwd:
3989; GENERIC: # BB#0:
3990; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
3991; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
3992; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3993; GENERIC-NEXT: retq # sched: [1:1.00]
3994;
3995; HASWELL-LABEL: test_pmovzxwd:
3996; HASWELL: # BB#0:
3997; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
3998; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00]
3999; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4000; HASWELL-NEXT: retq # sched: [2:1.00]
4001;
4002; SKYLAKE-LABEL: test_pmovzxwd:
4003; SKYLAKE: # BB#0:
4004; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004005; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4006; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4007; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004008;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004009; SKX-LABEL: test_pmovzxwd:
4010; SKX: # BB#0:
4011; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004012; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4013; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4014; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004015;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004016; ZNVER1-LABEL: test_pmovzxwd:
4017; ZNVER1: # BB#0:
4018; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4019; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
4020; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4021; ZNVER1-NEXT: retq # sched: [1:0.50]
4022 %1 = zext <8 x i16> %a0 to <8 x i32>
4023 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4024 %3 = zext <8 x i16> %2 to <8 x i32>
4025 %4 = add <8 x i32> %1, %3
4026 ret <8 x i32> %4
4027}
4028
4029define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4030; GENERIC-LABEL: test_pmovzxwq:
4031; GENERIC: # BB#0:
4032; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4033; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
4034; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4035; GENERIC-NEXT: retq # sched: [1:1.00]
4036;
4037; HASWELL-LABEL: test_pmovzxwq:
4038; HASWELL: # BB#0:
4039; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4040; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00]
4041; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4042; HASWELL-NEXT: retq # sched: [2:1.00]
4043;
4044; SKYLAKE-LABEL: test_pmovzxwq:
4045; SKYLAKE: # BB#0:
4046; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004047; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4048; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4049; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004050;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004051; SKX-LABEL: test_pmovzxwq:
4052; SKX: # BB#0:
4053; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004054; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4055; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4056; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004057;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004058; ZNVER1-LABEL: test_pmovzxwq:
4059; ZNVER1: # BB#0:
4060; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4061; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
4062; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4063; ZNVER1-NEXT: retq # sched: [1:0.50]
4064 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4065 %2 = zext <4 x i16> %1 to <4 x i64>
4066 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4067 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4068 %5 = zext <4 x i16> %4 to <4 x i64>
4069 %6 = add <4 x i64> %2, %5
4070 ret <4 x i64> %6
4071}
4072
4073define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4074; GENERIC-LABEL: test_pmuldq:
4075; GENERIC: # BB#0:
4076; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4077; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4078; GENERIC-NEXT: retq # sched: [1:1.00]
4079;
4080; HASWELL-LABEL: test_pmuldq:
4081; HASWELL: # BB#0:
4082; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4083; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4084; HASWELL-NEXT: retq # sched: [2:1.00]
4085;
4086; SKYLAKE-LABEL: test_pmuldq:
4087; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004088; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004089; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4090; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004091;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004092; SKX-LABEL: test_pmuldq:
4093; SKX: # BB#0:
4094; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004095; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4096; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004097;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004098; ZNVER1-LABEL: test_pmuldq:
4099; ZNVER1: # BB#0:
4100; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4101; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4102; ZNVER1-NEXT: retq # sched: [1:0.50]
4103 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4104 %2 = bitcast <4 x i64> %1 to <8 x i32>
4105 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4106 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
4107 ret <4 x i64> %4
4108}
4109declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4110
4111define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4112; GENERIC-LABEL: test_pmulhrsw:
4113; GENERIC: # BB#0:
4114; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4115; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4116; GENERIC-NEXT: retq # sched: [1:1.00]
4117;
4118; HASWELL-LABEL: test_pmulhrsw:
4119; HASWELL: # BB#0:
4120; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4121; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4122; HASWELL-NEXT: retq # sched: [2:1.00]
4123;
4124; SKYLAKE-LABEL: test_pmulhrsw:
4125; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004126; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004127; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4128; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004129;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004130; SKX-LABEL: test_pmulhrsw:
4131; SKX: # BB#0:
4132; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004133; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4134; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004135;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004136; ZNVER1-LABEL: test_pmulhrsw:
4137; ZNVER1: # BB#0:
4138; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4139; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4140; ZNVER1-NEXT: retq # sched: [1:0.50]
4141 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4142 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4143 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4144 ret <16 x i16> %3
4145}
4146declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4147
4148define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4149; GENERIC-LABEL: test_pmulhuw:
4150; GENERIC: # BB#0:
4151; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4152; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4153; GENERIC-NEXT: retq # sched: [1:1.00]
4154;
4155; HASWELL-LABEL: test_pmulhuw:
4156; HASWELL: # BB#0:
4157; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4158; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4159; HASWELL-NEXT: retq # sched: [2:1.00]
4160;
4161; SKYLAKE-LABEL: test_pmulhuw:
4162; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004163; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004164; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4165; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004166;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004167; SKX-LABEL: test_pmulhuw:
4168; SKX: # BB#0:
4169; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004170; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4171; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004172;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004173; ZNVER1-LABEL: test_pmulhuw:
4174; ZNVER1: # BB#0:
4175; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4176; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4177; ZNVER1-NEXT: retq # sched: [1:0.50]
4178 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4179 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4180 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4181 ret <16 x i16> %3
4182}
4183declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4184
4185define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4186; GENERIC-LABEL: test_pmulhw:
4187; GENERIC: # BB#0:
4188; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4189; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4190; GENERIC-NEXT: retq # sched: [1:1.00]
4191;
4192; HASWELL-LABEL: test_pmulhw:
4193; HASWELL: # BB#0:
4194; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4195; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4196; HASWELL-NEXT: retq # sched: [2:1.00]
4197;
4198; SKYLAKE-LABEL: test_pmulhw:
4199; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004200; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004201; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4202; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004203;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004204; SKX-LABEL: test_pmulhw:
4205; SKX: # BB#0:
4206; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004207; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4208; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004209;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004210; ZNVER1-LABEL: test_pmulhw:
4211; ZNVER1: # BB#0:
4212; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4213; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4214; ZNVER1-NEXT: retq # sched: [1:0.50]
4215 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4216 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4217 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4218 ret <16 x i16> %3
4219}
4220declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4221
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004222define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004223; GENERIC-LABEL: test_pmulld:
4224; GENERIC: # BB#0:
4225; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4226; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4227; GENERIC-NEXT: retq # sched: [1:1.00]
4228;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004229; HASWELL-LABEL: test_pmulld:
4230; HASWELL: # BB#0:
4231; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4232; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004233; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004234;
Gadi Haber767d98b2017-08-30 08:08:50 +00004235; SKYLAKE-LABEL: test_pmulld:
4236; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004237; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004238; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4239; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004240;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004241; SKX-LABEL: test_pmulld:
4242; SKX: # BB#0:
4243; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00004244; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4245; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004246;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004247; ZNVER1-LABEL: test_pmulld:
4248; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004249; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4250; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4251; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004252 %1 = mul <8 x i32> %a0, %a1
4253 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4254 %3 = mul <8 x i32> %1, %2
4255 ret <8 x i32> %3
4256}
4257
4258define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004259; GENERIC-LABEL: test_pmullw:
4260; GENERIC: # BB#0:
4261; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4262; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4263; GENERIC-NEXT: retq # sched: [1:1.00]
4264;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004265; HASWELL-LABEL: test_pmullw:
4266; HASWELL: # BB#0:
4267; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004268; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4269; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004270;
Gadi Haber767d98b2017-08-30 08:08:50 +00004271; SKYLAKE-LABEL: test_pmullw:
4272; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004273; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004274; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4275; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004276;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004277; SKX-LABEL: test_pmullw:
4278; SKX: # BB#0:
4279; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004280; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4281; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004282;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004283; ZNVER1-LABEL: test_pmullw:
4284; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00004285; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4286; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004287; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004288 %1 = mul <16 x i16> %a0, %a1
4289 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4290 %3 = mul <16 x i16> %1, %2
4291 ret <16 x i16> %3
4292}
4293
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004294define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4295; GENERIC-LABEL: test_pmuludq:
4296; GENERIC: # BB#0:
4297; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4298; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4299; GENERIC-NEXT: retq # sched: [1:1.00]
4300;
4301; HASWELL-LABEL: test_pmuludq:
4302; HASWELL: # BB#0:
4303; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4304; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4305; HASWELL-NEXT: retq # sched: [2:1.00]
4306;
4307; SKYLAKE-LABEL: test_pmuludq:
4308; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004309; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004310; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4311; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004312;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004313; SKX-LABEL: test_pmuludq:
4314; SKX: # BB#0:
4315; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004316; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4317; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004318;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004319; ZNVER1-LABEL: test_pmuludq:
4320; ZNVER1: # BB#0:
4321; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4322; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4323; ZNVER1-NEXT: retq # sched: [1:0.50]
4324 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
4325 %2 = bitcast <4 x i64> %1 to <8 x i32>
4326 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4327 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
4328 ret <4 x i64> %4
4329}
4330declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
4331
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004332define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004333; GENERIC-LABEL: test_por:
4334; GENERIC: # BB#0:
4335; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4336; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4337; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4338; GENERIC-NEXT: retq # sched: [1:1.00]
4339;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004340; HASWELL-LABEL: test_por:
4341; HASWELL: # BB#0:
4342; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004343; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004344; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004345; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004346;
Gadi Haber767d98b2017-08-30 08:08:50 +00004347; SKYLAKE-LABEL: test_por:
4348; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004349; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4350; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4351; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4352; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004353;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004354; SKX-LABEL: test_por:
4355; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004356; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4357; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4358; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4359; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004360;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004361; ZNVER1-LABEL: test_por:
4362; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00004363; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4364; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4365; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004366; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004367 %1 = or <4 x i64> %a0, %a1
4368 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
4369 %3 = or <4 x i64> %1, %2
4370 %4 = add <4 x i64> %3, %a1
4371 ret <4 x i64> %4
4372}
4373
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004374define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
4375; GENERIC-LABEL: test_psadbw:
4376; GENERIC: # BB#0:
4377; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4378; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4379; GENERIC-NEXT: retq # sched: [1:1.00]
4380;
4381; HASWELL-LABEL: test_psadbw:
4382; HASWELL: # BB#0:
4383; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4384; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4385; HASWELL-NEXT: retq # sched: [2:1.00]
4386;
4387; SKYLAKE-LABEL: test_psadbw:
4388; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004389; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004390; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4391; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004392;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004393; SKX-LABEL: test_psadbw:
4394; SKX: # BB#0:
4395; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004396; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4397; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004398;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004399; ZNVER1-LABEL: test_psadbw:
4400; ZNVER1: # BB#0:
4401; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4402; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4403; ZNVER1-NEXT: retq # sched: [1:0.50]
4404 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
4405 %2 = bitcast <4 x i64> %1 to <32 x i8>
4406 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
4407 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
4408 ret <4 x i64> %4
4409}
4410declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
4411
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004412define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
4413; GENERIC-LABEL: test_pshufb:
4414; GENERIC: # BB#0:
4415; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4416; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4417; GENERIC-NEXT: retq # sched: [1:1.00]
4418;
4419; HASWELL-LABEL: test_pshufb:
4420; HASWELL: # BB#0:
4421; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4422; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
4423; HASWELL-NEXT: retq # sched: [2:1.00]
4424;
4425; SKYLAKE-LABEL: test_pshufb:
4426; SKYLAKE: # BB#0:
4427; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004428; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
4429; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004430;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004431; SKX-LABEL: test_pshufb:
4432; SKX: # BB#0:
4433; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004434; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
4435; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004436;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004437; ZNVER1-LABEL: test_pshufb:
4438; ZNVER1: # BB#0:
4439; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4440; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4441; ZNVER1-NEXT: retq # sched: [1:0.50]
4442 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
4443 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
4444 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
4445 ret <32 x i8> %3
4446}
4447declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
4448
4449define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
4450; GENERIC-LABEL: test_pshufd:
4451; GENERIC: # BB#0:
4452; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
4453; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00004454; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004455; GENERIC-NEXT: retq # sched: [1:1.00]
4456;
4457; HASWELL-LABEL: test_pshufd:
4458; HASWELL: # BB#0:
4459; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
4460; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00004461; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004462; HASWELL-NEXT: retq # sched: [2:1.00]
4463;
4464; SKYLAKE-LABEL: test_pshufd:
4465; SKYLAKE: # BB#0:
4466; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004467; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
4468; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4469; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004470;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004471; SKX-LABEL: test_pshufd:
4472; SKX: # BB#0:
4473; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004474; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
4475; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4476; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004477;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004478; ZNVER1-LABEL: test_pshufd:
4479; ZNVER1: # BB#0:
4480; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
4481; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
Craig Topperd4341922017-09-18 03:29:47 +00004482; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004483; ZNVER1-NEXT: retq # sched: [1:0.50]
4484 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
4485 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
4486 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Craig Topperd4341922017-09-18 03:29:47 +00004487 %4 = add <8 x i32> %1, %3
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004488 ret <8 x i32> %4
4489}
4490
4491define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
4492; GENERIC-LABEL: test_pshufhw:
4493; GENERIC: # BB#0:
4494; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
4495; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
4496; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4497; GENERIC-NEXT: retq # sched: [1:1.00]
4498;
4499; HASWELL-LABEL: test_pshufhw:
4500; HASWELL: # BB#0:
4501; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
4502; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00]
4503; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4504; HASWELL-NEXT: retq # sched: [2:1.00]
4505;
4506; SKYLAKE-LABEL: test_pshufhw:
4507; SKYLAKE: # BB#0:
4508; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004509; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
4510; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4511; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004512;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004513; SKX-LABEL: test_pshufhw:
4514; SKX: # BB#0:
4515; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004516; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
4517; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4518; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004519;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004520; ZNVER1-LABEL: test_pshufhw:
4521; ZNVER1: # BB#0:
4522; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
4523; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
4524; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4525; ZNVER1-NEXT: retq # sched: [1:0.50]
4526 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
4527 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
4528 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
4529 %4 = or <16 x i16> %1, %3
4530 ret <16 x i16> %4
4531}
4532
4533define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
4534; GENERIC-LABEL: test_pshuflw:
4535; GENERIC: # BB#0:
4536; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
4537; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
4538; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4539; GENERIC-NEXT: retq # sched: [1:1.00]
4540;
4541; HASWELL-LABEL: test_pshuflw:
4542; HASWELL: # BB#0:
4543; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
4544; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00]
4545; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4546; HASWELL-NEXT: retq # sched: [2:1.00]
4547;
4548; SKYLAKE-LABEL: test_pshuflw:
4549; SKYLAKE: # BB#0:
4550; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004551; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
4552; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4553; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004554;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004555; SKX-LABEL: test_pshuflw:
4556; SKX: # BB#0:
4557; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004558; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
4559; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4560; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004561;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00004562; ZNVER1-LABEL: test_pshuflw:
4563; ZNVER1: # BB#0:
4564; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
4565; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
4566; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4567; ZNVER1-NEXT: retq # sched: [1:0.50]
4568 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
4569 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
4570 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
4571 %4 = or <16 x i16> %1, %3
4572 ret <16 x i16> %4
4573}
4574
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004575define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
4576; GENERIC-LABEL: test_psignb:
4577; GENERIC: # BB#0:
4578; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4579; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4580; GENERIC-NEXT: retq # sched: [1:1.00]
4581;
4582; HASWELL-LABEL: test_psignb:
4583; HASWELL: # BB#0:
4584; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4585; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4586; HASWELL-NEXT: retq # sched: [2:1.00]
4587;
4588; SKYLAKE-LABEL: test_psignb:
4589; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004590; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4591; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4592; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004593;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004594; SKX-LABEL: test_psignb:
4595; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004596; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4597; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4598; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004599;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004600; ZNVER1-LABEL: test_psignb:
4601; ZNVER1: # BB#0:
4602; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4603; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4604; ZNVER1-NEXT: retq # sched: [1:0.50]
4605 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
4606 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
4607 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
4608 ret <32 x i8> %3
4609}
4610declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
4611
4612define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4613; GENERIC-LABEL: test_psignd:
4614; GENERIC: # BB#0:
4615; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4616; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4617; GENERIC-NEXT: retq # sched: [1:1.00]
4618;
4619; HASWELL-LABEL: test_psignd:
4620; HASWELL: # BB#0:
4621; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4622; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4623; HASWELL-NEXT: retq # sched: [2:1.00]
4624;
4625; SKYLAKE-LABEL: test_psignd:
4626; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004627; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4628; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4629; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004630;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004631; SKX-LABEL: test_psignd:
4632; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004633; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4634; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4635; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004636;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004637; ZNVER1-LABEL: test_psignd:
4638; ZNVER1: # BB#0:
4639; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4640; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4641; ZNVER1-NEXT: retq # sched: [1:0.50]
4642 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
4643 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4644 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
4645 ret <8 x i32> %3
4646}
4647declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
4648
4649define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4650; GENERIC-LABEL: test_psignw:
4651; GENERIC: # BB#0:
4652; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4653; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4654; GENERIC-NEXT: retq # sched: [1:1.00]
4655;
4656; HASWELL-LABEL: test_psignw:
4657; HASWELL: # BB#0:
4658; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4659; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4660; HASWELL-NEXT: retq # sched: [2:1.00]
4661;
4662; SKYLAKE-LABEL: test_psignw:
4663; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004664; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4665; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4666; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004667;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004668; SKX-LABEL: test_psignw:
4669; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004670; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4671; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4672; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004673;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004674; ZNVER1-LABEL: test_psignw:
4675; ZNVER1: # BB#0:
4676; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4677; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4678; ZNVER1-NEXT: retq # sched: [1:0.50]
4679 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
4680 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4681 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
4682 ret <16 x i16> %3
4683}
4684declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
4685
4686define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
4687; GENERIC-LABEL: test_pslld:
4688; GENERIC: # BB#0:
4689; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
4690; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4691; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
4692; GENERIC-NEXT: retq # sched: [1:1.00]
4693;
4694; HASWELL-LABEL: test_pslld:
4695; HASWELL: # BB#0:
4696; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
4697; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
4698; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
4699; HASWELL-NEXT: retq # sched: [2:1.00]
4700;
4701; SKYLAKE-LABEL: test_pslld:
4702; SKYLAKE: # BB#0:
4703; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004704; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4705; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
4706; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004707;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004708; SKX-LABEL: test_pslld:
4709; SKX: # BB#0:
4710; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004711; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4712; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
4713; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004714;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004715; ZNVER1-LABEL: test_pslld:
4716; ZNVER1: # BB#0:
4717; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
4718; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4719; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
4720; ZNVER1-NEXT: retq # sched: [1:0.50]
4721 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
4722 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
4723 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
4724 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
4725 ret <8 x i32> %4
4726}
4727declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
4728
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004729define <32 x i8> @test_pslldq(<32 x i8> %a0) {
4730; GENERIC-LABEL: test_pslldq:
4731; GENERIC: # BB#0:
4732; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
4733; GENERIC-NEXT: retq # sched: [1:1.00]
4734;
4735; HASWELL-LABEL: test_pslldq:
4736; HASWELL: # BB#0:
4737; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
4738; HASWELL-NEXT: retq # sched: [2:1.00]
4739;
4740; SKYLAKE-LABEL: test_pslldq:
4741; SKYLAKE: # BB#0:
4742; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004743; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004744;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004745; SKX-LABEL: test_pslldq:
4746; SKX: # BB#0:
4747; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004748; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004749;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004750; ZNVER1-LABEL: test_pslldq:
4751; ZNVER1: # BB#0:
4752; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
4753; ZNVER1-NEXT: retq # sched: [1:0.50]
4754 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
4755 ret <32 x i8> %1
4756}
4757
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004758define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
4759; GENERIC-LABEL: test_psllq:
4760; GENERIC: # BB#0:
4761; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
4762; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4763; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
4764; GENERIC-NEXT: retq # sched: [1:1.00]
4765;
4766; HASWELL-LABEL: test_psllq:
4767; HASWELL: # BB#0:
4768; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
4769; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
4770; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
4771; HASWELL-NEXT: retq # sched: [2:1.00]
4772;
4773; SKYLAKE-LABEL: test_psllq:
4774; SKYLAKE: # BB#0:
4775; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004776; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4777; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
4778; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004779;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004780; SKX-LABEL: test_psllq:
4781; SKX: # BB#0:
4782; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004783; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4784; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
4785; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004786;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004787; ZNVER1-LABEL: test_psllq:
4788; ZNVER1: # BB#0:
4789; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
4790; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4791; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
4792; ZNVER1-NEXT: retq # sched: [1:0.50]
4793 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
4794 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
4795 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
4796 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
4797 ret <4 x i64> %4
4798}
4799declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
4800
4801define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
4802; GENERIC-LABEL: test_psllvd:
4803; GENERIC: # BB#0:
4804; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
4805; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
4806; GENERIC-NEXT: retq # sched: [1:1.00]
4807;
4808; HASWELL-LABEL: test_psllvd:
4809; HASWELL: # BB#0:
4810; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
4811; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
4812; HASWELL-NEXT: retq # sched: [2:1.00]
4813;
4814; SKYLAKE-LABEL: test_psllvd:
4815; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004816; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4817; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
4818; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004819;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004820; SKX-LABEL: test_psllvd:
4821; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004822; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4823; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
4824; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004825;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004826; ZNVER1-LABEL: test_psllvd:
4827; ZNVER1: # BB#0:
4828; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4829; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
4830; ZNVER1-NEXT: retq # sched: [1:0.50]
4831 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
4832 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
4833 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
4834 ret <4 x i32> %3
4835}
4836declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
4837
4838define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4839; GENERIC-LABEL: test_psllvd_ymm:
4840; GENERIC: # BB#0:
4841; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4842; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4843; GENERIC-NEXT: retq # sched: [1:1.00]
4844;
4845; HASWELL-LABEL: test_psllvd_ymm:
4846; HASWELL: # BB#0:
4847; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4848; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
4849; HASWELL-NEXT: retq # sched: [2:1.00]
4850;
4851; SKYLAKE-LABEL: test_psllvd_ymm:
4852; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004853; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4854; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4855; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004856;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004857; SKX-LABEL: test_psllvd_ymm:
4858; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004859; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4860; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4861; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004862;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004863; ZNVER1-LABEL: test_psllvd_ymm:
4864; ZNVER1: # BB#0:
4865; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4866; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4867; ZNVER1-NEXT: retq # sched: [1:0.50]
4868 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
4869 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4870 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
4871 ret <8 x i32> %3
4872}
4873declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
4874
4875define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
4876; GENERIC-LABEL: test_psllvq:
4877; GENERIC: # BB#0:
4878; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
4879; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
4880; GENERIC-NEXT: retq # sched: [1:1.00]
4881;
4882; HASWELL-LABEL: test_psllvq:
4883; HASWELL: # BB#0:
4884; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
4885; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
4886; HASWELL-NEXT: retq # sched: [2:1.00]
4887;
4888; SKYLAKE-LABEL: test_psllvq:
4889; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004890; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4891; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
4892; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004893;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004894; SKX-LABEL: test_psllvq:
4895; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004896; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4897; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
4898; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004899;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004900; ZNVER1-LABEL: test_psllvq:
4901; ZNVER1: # BB#0:
4902; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4903; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
4904; ZNVER1-NEXT: retq # sched: [1:0.50]
4905 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
4906 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
4907 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
4908 ret <2 x i64> %3
4909}
4910declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
4911
4912define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
4913; GENERIC-LABEL: test_psllvq_ymm:
4914; GENERIC: # BB#0:
4915; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4916; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4917; GENERIC-NEXT: retq # sched: [1:1.00]
4918;
4919; HASWELL-LABEL: test_psllvq_ymm:
4920; HASWELL: # BB#0:
4921; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
4922; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
4923; HASWELL-NEXT: retq # sched: [2:1.00]
4924;
4925; SKYLAKE-LABEL: test_psllvq_ymm:
4926; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004927; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4928; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4929; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004930;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004931; SKX-LABEL: test_psllvq_ymm:
4932; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004933; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4934; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4935; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004936;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004937; ZNVER1-LABEL: test_psllvq_ymm:
4938; ZNVER1: # BB#0:
4939; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4940; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4941; ZNVER1-NEXT: retq # sched: [1:0.50]
4942 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
4943 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
4944 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
4945 ret <4 x i64> %3
4946}
4947declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
4948
4949define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
4950; GENERIC-LABEL: test_psllw:
4951; GENERIC: # BB#0:
4952; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
4953; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4954; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
4955; GENERIC-NEXT: retq # sched: [1:1.00]
4956;
4957; HASWELL-LABEL: test_psllw:
4958; HASWELL: # BB#0:
4959; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
4960; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
4961; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
4962; HASWELL-NEXT: retq # sched: [2:1.00]
4963;
4964; SKYLAKE-LABEL: test_psllw:
4965; SKYLAKE: # BB#0:
4966; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004967; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4968; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
4969; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004970;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004971; SKX-LABEL: test_psllw:
4972; SKX: # BB#0:
4973; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004974; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4975; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
4976; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004977;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004978; ZNVER1-LABEL: test_psllw:
4979; ZNVER1: # BB#0:
4980; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
4981; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4982; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
4983; ZNVER1-NEXT: retq # sched: [1:0.50]
4984 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
4985 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
4986 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
4987 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
4988 ret <16 x i16> %4
4989}
4990declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
4991
4992define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
4993; GENERIC-LABEL: test_psrad:
4994; GENERIC: # BB#0:
4995; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
4996; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4997; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
4998; GENERIC-NEXT: retq # sched: [1:1.00]
4999;
5000; HASWELL-LABEL: test_psrad:
5001; HASWELL: # BB#0:
5002; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5003; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5004; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5005; HASWELL-NEXT: retq # sched: [2:1.00]
5006;
5007; SKYLAKE-LABEL: test_psrad:
5008; SKYLAKE: # BB#0:
5009; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005010; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5011; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5012; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005013;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005014; SKX-LABEL: test_psrad:
5015; SKX: # BB#0:
5016; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005017; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5018; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5019; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005020;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005021; ZNVER1-LABEL: test_psrad:
5022; ZNVER1: # BB#0:
5023; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5024; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5025; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5026; ZNVER1-NEXT: retq # sched: [1:0.50]
5027 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5028 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5029 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5030 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5031 ret <8 x i32> %4
5032}
5033declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5034
5035define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5036; GENERIC-LABEL: test_psravd:
5037; GENERIC: # BB#0:
5038; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5039; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5040; GENERIC-NEXT: retq # sched: [1:1.00]
5041;
5042; HASWELL-LABEL: test_psravd:
5043; HASWELL: # BB#0:
5044; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5045; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5046; HASWELL-NEXT: retq # sched: [2:1.00]
5047;
5048; SKYLAKE-LABEL: test_psravd:
5049; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005050; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5051; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5052; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005053;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005054; SKX-LABEL: test_psravd:
5055; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005056; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5057; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5058; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005059;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005060; ZNVER1-LABEL: test_psravd:
5061; ZNVER1: # BB#0:
5062; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5063; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5064; ZNVER1-NEXT: retq # sched: [1:0.50]
5065 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5066 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5067 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5068 ret <4 x i32> %3
5069}
5070declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5071
5072define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5073; GENERIC-LABEL: test_psravd_ymm:
5074; GENERIC: # BB#0:
5075; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5076; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5077; GENERIC-NEXT: retq # sched: [1:1.00]
5078;
5079; HASWELL-LABEL: test_psravd_ymm:
5080; HASWELL: # BB#0:
5081; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5082; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5083; HASWELL-NEXT: retq # sched: [2:1.00]
5084;
5085; SKYLAKE-LABEL: test_psravd_ymm:
5086; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005087; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5088; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5089; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005090;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005091; SKX-LABEL: test_psravd_ymm:
5092; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005093; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5094; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5095; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005096;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005097; ZNVER1-LABEL: test_psravd_ymm:
5098; ZNVER1: # BB#0:
5099; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5100; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5101; ZNVER1-NEXT: retq # sched: [1:0.50]
5102 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5103 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5104 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5105 ret <8 x i32> %3
5106}
5107declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5108
5109define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5110; GENERIC-LABEL: test_psraw:
5111; GENERIC: # BB#0:
5112; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5113; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5114; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5115; GENERIC-NEXT: retq # sched: [1:1.00]
5116;
5117; HASWELL-LABEL: test_psraw:
5118; HASWELL: # BB#0:
5119; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5120; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5121; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5122; HASWELL-NEXT: retq # sched: [2:1.00]
5123;
5124; SKYLAKE-LABEL: test_psraw:
5125; SKYLAKE: # BB#0:
5126; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005127; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5128; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5129; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005130;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005131; SKX-LABEL: test_psraw:
5132; SKX: # BB#0:
5133; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005134; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5135; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5136; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005137;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005138; ZNVER1-LABEL: test_psraw:
5139; ZNVER1: # BB#0:
5140; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5141; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5142; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5143; ZNVER1-NEXT: retq # sched: [1:0.50]
5144 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5145 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5146 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5147 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5148 ret <16 x i16> %4
5149}
5150declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5151
5152define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5153; GENERIC-LABEL: test_psrld:
5154; GENERIC: # BB#0:
5155; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5156; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5157; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5158; GENERIC-NEXT: retq # sched: [1:1.00]
5159;
5160; HASWELL-LABEL: test_psrld:
5161; HASWELL: # BB#0:
5162; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5163; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5164; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5165; HASWELL-NEXT: retq # sched: [2:1.00]
5166;
5167; SKYLAKE-LABEL: test_psrld:
5168; SKYLAKE: # BB#0:
5169; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005170; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5171; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
5172; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005173;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005174; SKX-LABEL: test_psrld:
5175; SKX: # BB#0:
5176; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005177; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5178; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
5179; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005180;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005181; ZNVER1-LABEL: test_psrld:
5182; ZNVER1: # BB#0:
5183; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5184; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5185; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
5186; ZNVER1-NEXT: retq # sched: [1:0.50]
5187 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
5188 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5189 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
5190 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5191 ret <8 x i32> %4
5192}
5193declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
5194
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005195define <32 x i8> @test_psrldq(<32 x i8> %a0) {
5196; GENERIC-LABEL: test_psrldq:
5197; GENERIC: # BB#0:
5198; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
5199; GENERIC-NEXT: retq # sched: [1:1.00]
5200;
5201; HASWELL-LABEL: test_psrldq:
5202; HASWELL: # BB#0:
5203; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
5204; HASWELL-NEXT: retq # sched: [2:1.00]
5205;
5206; SKYLAKE-LABEL: test_psrldq:
5207; SKYLAKE: # BB#0:
5208; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005209; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005210;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005211; SKX-LABEL: test_psrldq:
5212; SKX: # BB#0:
5213; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005214; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005215;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005216; ZNVER1-LABEL: test_psrldq:
5217; ZNVER1: # BB#0:
5218; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
5219; ZNVER1-NEXT: retq # sched: [1:0.50]
5220 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
5221 ret <32 x i8> %1
5222}
5223
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005224define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5225; GENERIC-LABEL: test_psrlq:
5226; GENERIC: # BB#0:
5227; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5228; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5229; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
5230; GENERIC-NEXT: retq # sched: [1:1.00]
5231;
5232; HASWELL-LABEL: test_psrlq:
5233; HASWELL: # BB#0:
5234; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5235; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5236; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
5237; HASWELL-NEXT: retq # sched: [2:1.00]
5238;
5239; SKYLAKE-LABEL: test_psrlq:
5240; SKYLAKE: # BB#0:
5241; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005242; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5243; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
5244; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005245;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005246; SKX-LABEL: test_psrlq:
5247; SKX: # BB#0:
5248; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005249; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5250; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
5251; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005252;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005253; ZNVER1-LABEL: test_psrlq:
5254; ZNVER1: # BB#0:
5255; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5256; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5257; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
5258; ZNVER1-NEXT: retq # sched: [1:0.50]
5259 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
5260 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5261 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
5262 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5263 ret <4 x i64> %4
5264}
5265declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
5266
5267define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5268; GENERIC-LABEL: test_psrlvd:
5269; GENERIC: # BB#0:
5270; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5271; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5272; GENERIC-NEXT: retq # sched: [1:1.00]
5273;
5274; HASWELL-LABEL: test_psrlvd:
5275; HASWELL: # BB#0:
5276; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5277; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5278; HASWELL-NEXT: retq # sched: [2:1.00]
5279;
5280; SKYLAKE-LABEL: test_psrlvd:
5281; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005282; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5283; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5284; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005285;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005286; SKX-LABEL: test_psrlvd:
5287; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005288; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5289; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5290; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005291;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005292; ZNVER1-LABEL: test_psrlvd:
5293; ZNVER1: # BB#0:
5294; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5295; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5296; ZNVER1-NEXT: retq # sched: [1:0.50]
5297 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
5298 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5299 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
5300 ret <4 x i32> %3
5301}
5302declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
5303
5304define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5305; GENERIC-LABEL: test_psrlvd_ymm:
5306; GENERIC: # BB#0:
5307; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5308; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5309; GENERIC-NEXT: retq # sched: [1:1.00]
5310;
5311; HASWELL-LABEL: test_psrlvd_ymm:
5312; HASWELL: # BB#0:
5313; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5314; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5315; HASWELL-NEXT: retq # sched: [2:1.00]
5316;
5317; SKYLAKE-LABEL: test_psrlvd_ymm:
5318; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005319; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5320; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5321; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005322;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005323; SKX-LABEL: test_psrlvd_ymm:
5324; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005325; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5326; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5327; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005328;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005329; ZNVER1-LABEL: test_psrlvd_ymm:
5330; ZNVER1: # BB#0:
5331; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5332; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5333; ZNVER1-NEXT: retq # sched: [1:0.50]
5334 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5335 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5336 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
5337 ret <8 x i32> %3
5338}
5339declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5340
5341define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5342; GENERIC-LABEL: test_psrlvq:
5343; GENERIC: # BB#0:
5344; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5345; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5346; GENERIC-NEXT: retq # sched: [1:1.00]
5347;
5348; HASWELL-LABEL: test_psrlvq:
5349; HASWELL: # BB#0:
5350; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5351; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
5352; HASWELL-NEXT: retq # sched: [2:1.00]
5353;
5354; SKYLAKE-LABEL: test_psrlvq:
5355; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005356; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5357; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5358; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005359;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005360; SKX-LABEL: test_psrlvq:
5361; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005362; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5363; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5364; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005365;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005366; ZNVER1-LABEL: test_psrlvq:
5367; ZNVER1: # BB#0:
5368; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5369; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5370; ZNVER1-NEXT: retq # sched: [1:0.50]
5371 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
5372 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5373 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
5374 ret <2 x i64> %3
5375}
5376declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
5377
5378define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5379; GENERIC-LABEL: test_psrlvq_ymm:
5380; GENERIC: # BB#0:
5381; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5382; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5383; GENERIC-NEXT: retq # sched: [1:1.00]
5384;
5385; HASWELL-LABEL: test_psrlvq_ymm:
5386; HASWELL: # BB#0:
5387; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5388; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5389; HASWELL-NEXT: retq # sched: [2:1.00]
5390;
5391; SKYLAKE-LABEL: test_psrlvq_ymm:
5392; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005393; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5394; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005397; SKX-LABEL: test_psrlvq_ymm:
5398; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005399; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5400; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5401; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005402;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005403; ZNVER1-LABEL: test_psrlvq_ymm:
5404; ZNVER1: # BB#0:
5405; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5406; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5407; ZNVER1-NEXT: retq # sched: [1:0.50]
5408 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5409 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5410 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
5411 ret <4 x i64> %3
5412}
5413declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5414
5415define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5416; GENERIC-LABEL: test_psrlw:
5417; GENERIC: # BB#0:
5418; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5419; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5420; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
5421; GENERIC-NEXT: retq # sched: [1:1.00]
5422;
5423; HASWELL-LABEL: test_psrlw:
5424; HASWELL: # BB#0:
5425; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5426; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5427; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
5428; HASWELL-NEXT: retq # sched: [2:1.00]
5429;
5430; SKYLAKE-LABEL: test_psrlw:
5431; SKYLAKE: # BB#0:
5432; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005433; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5434; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
5435; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005436;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005437; SKX-LABEL: test_psrlw:
5438; SKX: # BB#0:
5439; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005440; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5441; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
5442; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005443;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005444; ZNVER1-LABEL: test_psrlw:
5445; ZNVER1: # BB#0:
5446; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5447; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5448; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
5449; ZNVER1-NEXT: retq # sched: [1:0.50]
5450 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
5451 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5452 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
5453 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5454 ret <16 x i16> %4
5455}
5456declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
5457
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005458define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005459; GENERIC-LABEL: test_psubb:
5460; GENERIC: # BB#0:
5461; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5462; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5463; GENERIC-NEXT: retq # sched: [1:1.00]
5464;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005465; HASWELL-LABEL: test_psubb:
5466; HASWELL: # BB#0:
5467; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005468; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5469; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005470;
Gadi Haber767d98b2017-08-30 08:08:50 +00005471; SKYLAKE-LABEL: test_psubb:
5472; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005473; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5474; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5475; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005476;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005477; SKX-LABEL: test_psubb:
5478; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005479; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5480; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5481; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005482;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005483; ZNVER1-LABEL: test_psubb:
5484; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00005485; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5486; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005487; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005488 %1 = sub <32 x i8> %a0, %a1
5489 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5490 %3 = sub <32 x i8> %1, %2
5491 ret <32 x i8> %3
5492}
5493
5494define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005495; GENERIC-LABEL: test_psubd:
5496; GENERIC: # BB#0:
5497; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5498; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5499; GENERIC-NEXT: retq # sched: [1:1.00]
5500;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005501; HASWELL-LABEL: test_psubd:
5502; HASWELL: # BB#0:
5503; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005504; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5505; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005506;
Gadi Haber767d98b2017-08-30 08:08:50 +00005507; SKYLAKE-LABEL: test_psubd:
5508; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005509; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5510; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5511; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005512;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005513; SKX-LABEL: test_psubd:
5514; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005515; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5516; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5517; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005518;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005519; ZNVER1-LABEL: test_psubd:
5520; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00005521; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5522; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005523; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005524 %1 = sub <8 x i32> %a0, %a1
5525 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5526 %3 = sub <8 x i32> %1, %2
5527 ret <8 x i32> %3
5528}
5529
5530define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005531; GENERIC-LABEL: test_psubq:
5532; GENERIC: # BB#0:
5533; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5534; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5535; GENERIC-NEXT: retq # sched: [1:1.00]
5536;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005537; HASWELL-LABEL: test_psubq:
5538; HASWELL: # BB#0:
5539; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005540; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5541; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005542;
Gadi Haber767d98b2017-08-30 08:08:50 +00005543; SKYLAKE-LABEL: test_psubq:
5544; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005545; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5546; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5547; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005548;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005549; SKX-LABEL: test_psubq:
5550; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005551; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5552; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5553; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005554;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005555; ZNVER1-LABEL: test_psubq:
5556; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00005557; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5558; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005559; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005560 %1 = sub <4 x i64> %a0, %a1
5561 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5562 %3 = sub <4 x i64> %1, %2
5563 ret <4 x i64> %3
5564}
5565
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005566define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5567; GENERIC-LABEL: test_psubsb:
5568; GENERIC: # BB#0:
5569; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5570; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5571; GENERIC-NEXT: retq # sched: [1:1.00]
5572;
5573; HASWELL-LABEL: test_psubsb:
5574; HASWELL: # BB#0:
5575; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5576; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5577; HASWELL-NEXT: retq # sched: [2:1.00]
5578;
5579; SKYLAKE-LABEL: test_psubsb:
5580; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005581; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5582; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5583; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005584;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005585; SKX-LABEL: test_psubsb:
5586; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005587; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5588; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5589; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005590;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005591; ZNVER1-LABEL: test_psubsb:
5592; ZNVER1: # BB#0:
5593; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5594; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5595; ZNVER1-NEXT: retq # sched: [1:0.50]
5596 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
5597 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5598 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2)
5599 ret <32 x i8> %3
5600}
5601declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
5602
5603define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5604; GENERIC-LABEL: test_psubsw:
5605; GENERIC: # BB#0:
5606; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5607; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5608; GENERIC-NEXT: retq # sched: [1:1.00]
5609;
5610; HASWELL-LABEL: test_psubsw:
5611; HASWELL: # BB#0:
5612; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5613; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5614; HASWELL-NEXT: retq # sched: [2:1.00]
5615;
5616; SKYLAKE-LABEL: test_psubsw:
5617; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005618; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5619; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5620; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005621;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005622; SKX-LABEL: test_psubsw:
5623; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005624; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5625; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5626; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005627;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005628; ZNVER1-LABEL: test_psubsw:
5629; ZNVER1: # BB#0:
5630; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5631; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5632; ZNVER1-NEXT: retq # sched: [1:0.50]
5633 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
5634 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5635 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2)
5636 ret <16 x i16> %3
5637}
5638declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
5639
5640define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5641; GENERIC-LABEL: test_psubusb:
5642; GENERIC: # BB#0:
5643; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5644; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5645; GENERIC-NEXT: retq # sched: [1:1.00]
5646;
5647; HASWELL-LABEL: test_psubusb:
5648; HASWELL: # BB#0:
5649; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5650; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5651; HASWELL-NEXT: retq # sched: [2:1.00]
5652;
5653; SKYLAKE-LABEL: test_psubusb:
5654; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005655; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5656; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5657; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005658;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005659; SKX-LABEL: test_psubusb:
5660; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005661; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5662; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5663; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005664;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005665; ZNVER1-LABEL: test_psubusb:
5666; ZNVER1: # BB#0:
5667; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5668; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5669; ZNVER1-NEXT: retq # sched: [1:0.50]
5670 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
5671 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5672 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2)
5673 ret <32 x i8> %3
5674}
5675declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
5676
5677define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5678; GENERIC-LABEL: test_psubusw:
5679; GENERIC: # BB#0:
5680; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5681; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5682; GENERIC-NEXT: retq # sched: [1:1.00]
5683;
5684; HASWELL-LABEL: test_psubusw:
5685; HASWELL: # BB#0:
5686; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5687; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5688; HASWELL-NEXT: retq # sched: [2:1.00]
5689;
5690; SKYLAKE-LABEL: test_psubusw:
5691; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005692; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5693; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5694; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005695;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005696; SKX-LABEL: test_psubusw:
5697; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005698; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5699; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5700; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005701;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005702; ZNVER1-LABEL: test_psubusw:
5703; ZNVER1: # BB#0:
5704; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5705; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5706; ZNVER1-NEXT: retq # sched: [1:0.50]
5707 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
5708 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5709 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2)
5710 ret <16 x i16> %3
5711}
5712declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
5713
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005714define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005715; GENERIC-LABEL: test_psubw:
5716; GENERIC: # BB#0:
5717; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5718; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5719; GENERIC-NEXT: retq # sched: [1:1.00]
5720;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005721; HASWELL-LABEL: test_psubw:
5722; HASWELL: # BB#0:
5723; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005724; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5725; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005726;
Gadi Haber767d98b2017-08-30 08:08:50 +00005727; SKYLAKE-LABEL: test_psubw:
5728; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005729; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5730; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5731; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005732;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005733; SKX-LABEL: test_psubw:
5734; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005735; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5736; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5737; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005738;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005739; ZNVER1-LABEL: test_psubw:
5740; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00005741; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5742; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005743; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005744 %1 = sub <16 x i16> %a0, %a1
5745 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5746 %3 = sub <16 x i16> %1, %2
5747 ret <16 x i16> %3
5748}
5749
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005750define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5751; GENERIC-LABEL: test_punpckhbw:
5752; GENERIC: # BB#0:
5753; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
5754; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
5755; GENERIC-NEXT: retq # sched: [1:1.00]
5756;
5757; HASWELL-LABEL: test_punpckhbw:
5758; HASWELL: # BB#0:
5759; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
5760; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00]
5761; HASWELL-NEXT: retq # sched: [2:1.00]
5762;
5763; SKYLAKE-LABEL: test_punpckhbw:
5764; SKYLAKE: # BB#0:
5765; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005766; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
5767; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005768;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005769; SKX-LABEL: test_punpckhbw:
5770; SKX: # BB#0:
5771; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005772; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
5773; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005774;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005775; ZNVER1-LABEL: test_punpckhbw:
5776; ZNVER1: # BB#0:
5777; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
5778; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
5779; ZNVER1-NEXT: retq # sched: [1:0.50]
5780 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
5781 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5782 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
5783 ret <32 x i8> %3
5784}
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005785
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005786define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5787; GENERIC-LABEL: test_punpckhdq:
5788; GENERIC: # BB#0:
5789; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5790; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
5791; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
5792; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5793; GENERIC-NEXT: retq # sched: [1:1.00]
5794;
5795; HASWELL-LABEL: test_punpckhdq:
5796; HASWELL: # BB#0:
5797; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5798; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
5799; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5800; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5801; HASWELL-NEXT: retq # sched: [2:1.00]
5802;
5803; SKYLAKE-LABEL: test_punpckhdq:
5804; SKYLAKE: # BB#0:
5805; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005806; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5807; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5808; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5809; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005810;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005811; SKX-LABEL: test_punpckhdq:
5812; SKX: # BB#0:
5813; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005814; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5815; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5816; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5817; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005818;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005819; ZNVER1-LABEL: test_punpckhdq:
5820; ZNVER1: # BB#0:
5821; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
5822; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
5823; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
5824; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5825; ZNVER1-NEXT: retq # sched: [1:0.50]
5826 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
5827 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5828 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
5829 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5830 ret <8 x i32> %4
5831}
5832
5833define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5834; GENERIC-LABEL: test_punpckhqdq:
5835; GENERIC: # BB#0:
5836; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
5837; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
5838; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5839; GENERIC-NEXT: retq # sched: [1:1.00]
5840;
5841; HASWELL-LABEL: test_punpckhqdq:
5842; HASWELL: # BB#0:
5843; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
5844; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00]
5845; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
5846; HASWELL-NEXT: retq # sched: [2:1.00]
5847;
5848; SKYLAKE-LABEL: test_punpckhqdq:
5849; SKYLAKE: # BB#0:
5850; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005851; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
5852; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
5853; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005854;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005855; SKX-LABEL: test_punpckhqdq:
5856; SKX: # BB#0:
5857; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005858; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
5859; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
5860; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005861;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005862; ZNVER1-LABEL: test_punpckhqdq:
5863; ZNVER1: # BB#0:
5864; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
5865; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
5866; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
5867; ZNVER1-NEXT: retq # sched: [1:0.50]
5868 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
5869 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5870 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
5871 %4 = add <4 x i64> %1, %3
5872 ret <4 x i64> %4
5873}
5874
5875define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5876; GENERIC-LABEL: test_punpckhwd:
5877; GENERIC: # BB#0:
5878; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
5879; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
5880; GENERIC-NEXT: retq # sched: [1:1.00]
5881;
5882; HASWELL-LABEL: test_punpckhwd:
5883; HASWELL: # BB#0:
5884; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
5885; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00]
5886; HASWELL-NEXT: retq # sched: [2:1.00]
5887;
5888; SKYLAKE-LABEL: test_punpckhwd:
5889; SKYLAKE: # BB#0:
5890; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005891; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
5892; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005893;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005894; SKX-LABEL: test_punpckhwd:
5895; SKX: # BB#0:
5896; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005897; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
5898; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005899;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005900; ZNVER1-LABEL: test_punpckhwd:
5901; ZNVER1: # BB#0:
5902; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
5903; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
5904; ZNVER1-NEXT: retq # sched: [1:0.50]
5905 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
5906 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5907 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
5908 ret <16 x i16> %3
5909}
5910
5911define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5912; GENERIC-LABEL: test_punpcklbw:
5913; GENERIC: # BB#0:
5914; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
5915; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
5916; GENERIC-NEXT: retq # sched: [1:1.00]
5917;
5918; HASWELL-LABEL: test_punpcklbw:
5919; HASWELL: # BB#0:
5920; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
5921; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00]
5922; HASWELL-NEXT: retq # sched: [2:1.00]
5923;
5924; SKYLAKE-LABEL: test_punpcklbw:
5925; SKYLAKE: # BB#0:
5926; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005927; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
5928; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005929;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005930; SKX-LABEL: test_punpcklbw:
5931; SKX: # BB#0:
5932; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005933; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
5934; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005935;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005936; ZNVER1-LABEL: test_punpcklbw:
5937; ZNVER1: # BB#0:
5938; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
5939; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
5940; ZNVER1-NEXT: retq # sched: [1:0.50]
5941 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
5942 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5943 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
5944 ret <32 x i8> %3
5945}
5946
5947define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5948; GENERIC-LABEL: test_punpckldq:
5949; GENERIC: # BB#0:
5950; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5951; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
5952; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
5953; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5954; GENERIC-NEXT: retq # sched: [1:1.00]
5955;
5956; HASWELL-LABEL: test_punpckldq:
5957; HASWELL: # BB#0:
5958; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5959; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
5960; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5961; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5962; HASWELL-NEXT: retq # sched: [2:1.00]
5963;
5964; SKYLAKE-LABEL: test_punpckldq:
5965; SKYLAKE: # BB#0:
5966; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005967; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5968; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5969; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5970; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005971;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005972; SKX-LABEL: test_punpckldq:
5973; SKX: # BB#0:
5974; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005975; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5976; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
5977; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5978; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005979;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005980; ZNVER1-LABEL: test_punpckldq:
5981; ZNVER1: # BB#0:
5982; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
5983; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
5984; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
5985; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5986; ZNVER1-NEXT: retq # sched: [1:0.50]
5987 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
5988 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5989 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
5990 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5991 ret <8 x i32> %4
5992}
5993
5994define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5995; GENERIC-LABEL: test_punpcklqdq:
5996; GENERIC: # BB#0:
5997; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5998; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
5999; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6000; GENERIC-NEXT: retq # sched: [1:1.00]
6001;
6002; HASWELL-LABEL: test_punpcklqdq:
6003; HASWELL: # BB#0:
6004; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6005; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00]
6006; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6007; HASWELL-NEXT: retq # sched: [2:1.00]
6008;
6009; SKYLAKE-LABEL: test_punpcklqdq:
6010; SKYLAKE: # BB#0:
6011; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006012; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6013; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6014; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006015;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006016; SKX-LABEL: test_punpcklqdq:
6017; SKX: # BB#0:
6018; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006019; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6020; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6021; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006022;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006023; ZNVER1-LABEL: test_punpcklqdq:
6024; ZNVER1: # BB#0:
6025; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
6026; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
6027; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6028; ZNVER1-NEXT: retq # sched: [1:0.50]
6029 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
6030 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6031 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
6032 %4 = add <4 x i64> %1, %3
6033 ret <4 x i64> %4
6034}
6035
6036define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6037; GENERIC-LABEL: test_punpcklwd:
6038; GENERIC: # BB#0:
6039; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
6040; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
6041; GENERIC-NEXT: retq # sched: [1:1.00]
6042;
6043; HASWELL-LABEL: test_punpcklwd:
6044; HASWELL: # BB#0:
6045; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
6046; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00]
6047; HASWELL-NEXT: retq # sched: [2:1.00]
6048;
6049; SKYLAKE-LABEL: test_punpcklwd:
6050; SKYLAKE: # BB#0:
6051; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006052; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
6053; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006054;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006055; SKX-LABEL: test_punpcklwd:
6056; SKX: # BB#0:
6057; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006058; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
6059; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006060;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006061; ZNVER1-LABEL: test_punpcklwd:
6062; ZNVER1: # BB#0:
6063; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
6064; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
6065; ZNVER1-NEXT: retq # sched: [1:0.50]
6066 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
6067 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6068 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
6069 ret <16 x i16> %3
6070}
6071
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006072define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006073; GENERIC-LABEL: test_pxor:
6074; GENERIC: # BB#0:
6075; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6076; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6077; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6078; GENERIC-NEXT: retq # sched: [1:1.00]
6079;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006080; HASWELL-LABEL: test_pxor:
6081; HASWELL: # BB#0:
6082; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006083; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006084; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006085; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006086;
Gadi Haber767d98b2017-08-30 08:08:50 +00006087; SKYLAKE-LABEL: test_pxor:
6088; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006089; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6090; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6091; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6092; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006093;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006094; SKX-LABEL: test_pxor:
6095; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006096; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6097; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6098; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6099; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006100;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006101; ZNVER1-LABEL: test_pxor:
6102; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00006103; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6104; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6105; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006106; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006107 %1 = xor <4 x i64> %a0, %a1
6108 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6109 %3 = xor <4 x i64> %1, %2
6110 %4 = add <4 x i64> %3, %a1
6111 ret <4 x i64> %4
6112}
6113
6114!0 = !{i32 1}