blob: cec8ca94409d23331a65ee40a827a6e4be09df82 [file] [log] [blame]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim946f08c2017-05-06 13:46:09 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber85d99b42017-10-17 13:45:39 +00004; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
8
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00009define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
10; GENERIC-LABEL: test_broadcasti128:
11; GENERIC: # BB#0:
12; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
13; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
14; GENERIC-NEXT: retq # sched: [1:1.00]
15;
16; HASWELL-LABEL: test_broadcasti128:
17; HASWELL: # BB#0:
18; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50]
19; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
20; HASWELL-NEXT: retq # sched: [2:1.00]
21;
Gadi Haber85d99b42017-10-17 13:45:39 +000022; BROADWELL-LABEL: test_broadcasti128:
23; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +000024; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +000025; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +000026; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000027;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000028; SKYLAKE-LABEL: test_broadcasti128:
29; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +000030; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
31; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
32; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000033;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000034; SKX-LABEL: test_broadcasti128:
35; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +000036; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
37; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
38; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000039;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000040; ZNVER1-LABEL: test_broadcasti128:
41; ZNVER1: # BB#0:
42; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
43; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
44; ZNVER1-NEXT: retq # sched: [1:0.50]
45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
47 %3 = add <8 x i32> %2, %a0
48 ret <8 x i32> %3
49}
50
Simon Pilgrim5a931c62017-09-12 11:17:01 +000051define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
52; GENERIC-LABEL: test_broadcastsd_ymm:
53; GENERIC: # BB#0:
54; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
55; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
56; GENERIC-NEXT: retq # sched: [1:1.00]
57;
58; HASWELL-LABEL: test_broadcastsd_ymm:
59; HASWELL: # BB#0:
60; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
61; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
62; HASWELL-NEXT: retq # sched: [2:1.00]
63;
Gadi Haber85d99b42017-10-17 13:45:39 +000064; BROADWELL-LABEL: test_broadcastsd_ymm:
65; BROADWELL: # BB#0:
66; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
67; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +000068; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000069;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000070; SKYLAKE-LABEL: test_broadcastsd_ymm:
71; SKYLAKE: # BB#0:
72; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +000073; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000074; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000075;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000076; SKX-LABEL: test_broadcastsd_ymm:
77; SKX: # BB#0:
78; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +000079; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
80; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000081;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000082; ZNVER1-LABEL: test_broadcastsd_ymm:
83; ZNVER1: # BB#0:
84; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
85; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
86; ZNVER1-NEXT: retq # sched: [1:0.50]
87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
88 %2 = fadd <4 x double> %1, %1
89 ret <4 x double> %2
90}
91
92define <4 x float> @test_broadcastss(<4 x float> %a0) {
93; GENERIC-LABEL: test_broadcastss:
94; GENERIC: # BB#0:
95; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
96; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
97; GENERIC-NEXT: retq # sched: [1:1.00]
98;
99; HASWELL-LABEL: test_broadcastss:
100; HASWELL: # BB#0:
101; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
102; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
103; HASWELL-NEXT: retq # sched: [2:1.00]
104;
Gadi Haber85d99b42017-10-17 13:45:39 +0000105; BROADWELL-LABEL: test_broadcastss:
106; BROADWELL: # BB#0:
107; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
108; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000109; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000110;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000111; SKYLAKE-LABEL: test_broadcastss:
112; SKYLAKE: # BB#0:
113; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000114; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000115; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000116;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000117; SKX-LABEL: test_broadcastss:
118; SKX: # BB#0:
119; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000120; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
121; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000122;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000123; ZNVER1-LABEL: test_broadcastss:
124; ZNVER1: # BB#0:
125; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
126; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
127; ZNVER1-NEXT: retq # sched: [1:0.50]
128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
129 %2 = fadd <4 x float> %1, %1
130 ret <4 x float> %2
131}
132
133define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
134; GENERIC-LABEL: test_broadcastss_ymm:
135; GENERIC: # BB#0:
136; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
137; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
138; GENERIC-NEXT: retq # sched: [1:1.00]
139;
140; HASWELL-LABEL: test_broadcastss_ymm:
141; HASWELL: # BB#0:
142; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
143; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
144; HASWELL-NEXT: retq # sched: [2:1.00]
145;
Gadi Haber85d99b42017-10-17 13:45:39 +0000146; BROADWELL-LABEL: test_broadcastss_ymm:
147; BROADWELL: # BB#0:
148; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
149; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000150; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000151;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000152; SKYLAKE-LABEL: test_broadcastss_ymm:
153; SKYLAKE: # BB#0:
154; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000155; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000156; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000157;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000158; SKX-LABEL: test_broadcastss_ymm:
159; SKX: # BB#0:
160; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000161; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
162; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000163;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000164; ZNVER1-LABEL: test_broadcastss_ymm:
165; ZNVER1: # BB#0:
166; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
167; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
168; ZNVER1-NEXT: retq # sched: [1:0.50]
169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
170 %2 = fadd <8 x float> %1, %1
171 ret <8 x float> %2
172}
173
174define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
175; GENERIC-LABEL: test_extracti128:
176; GENERIC: # BB#0:
177; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
178; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
179; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
180; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
181; GENERIC-NEXT: vzeroupper
182; GENERIC-NEXT: retq # sched: [1:1.00]
183;
184; HASWELL-LABEL: test_extracti128:
185; HASWELL: # BB#0:
186; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
187; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
188; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
189; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
190; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
191; HASWELL-NEXT: retq # sched: [2:1.00]
192;
Gadi Haber85d99b42017-10-17 13:45:39 +0000193; BROADWELL-LABEL: test_extracti128:
194; BROADWELL: # BB#0:
195; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
196; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
197; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
198; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
199; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000200; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000201;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000202; SKYLAKE-LABEL: test_extracti128:
203; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000204; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
205; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000206; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
207; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
208; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000209; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000210;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000211; SKX-LABEL: test_extracti128:
212; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000213; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
214; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000215; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
216; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
217; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000218; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000219;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000220; ZNVER1-LABEL: test_extracti128:
221; ZNVER1: # BB#0:
222; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
223; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
224; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25]
225; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50]
226; ZNVER1-NEXT: vzeroupper # sched: [100:?]
227; ZNVER1-NEXT: retq # sched: [1:0.50]
228 %1 = add <8 x i32> %a0, %a1
229 %2 = sub <8 x i32> %a0, %a1
230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
232 store <4 x i32> %3, <4 x i32> *%a2
233 ret <4 x i32> %4
234}
235
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000236define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
237; GENERIC-LABEL: test_gatherdpd:
238; GENERIC: # BB#0:
239; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
240; GENERIC-NEXT: retq # sched: [1:1.00]
241;
242; HASWELL-LABEL: test_gatherdpd:
243; HASWELL: # BB#0:
244; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
245; HASWELL-NEXT: retq # sched: [2:1.00]
246;
Gadi Haber85d99b42017-10-17 13:45:39 +0000247; BROADWELL-LABEL: test_gatherdpd:
248; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000249; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000251;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000252; SKYLAKE-LABEL: test_gatherdpd:
253; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000254; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
255; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000256;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000257; SKX-LABEL: test_gatherdpd:
258; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000259; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
260; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000261;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000262; ZNVER1-LABEL: test_gatherdpd:
263; ZNVER1: # BB#0:
264; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
265; ZNVER1-NEXT: retq # sched: [1:0.50]
266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
267 ret <2 x double> %1
268}
269declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
270
271define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
272; GENERIC-LABEL: test_gatherdpd_ymm:
273; GENERIC: # BB#0:
274; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0
275; GENERIC-NEXT: retq # sched: [1:1.00]
276;
277; HASWELL-LABEL: test_gatherdpd_ymm:
278; HASWELL: # BB#0:
279; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?]
280; HASWELL-NEXT: retq # sched: [2:1.00]
281;
Gadi Haber85d99b42017-10-17 13:45:39 +0000282; BROADWELL-LABEL: test_gatherdpd_ymm:
283; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000284; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
285; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000286;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000287; SKYLAKE-LABEL: test_gatherdpd_ymm:
288; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000289; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
290; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000291;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000292; SKX-LABEL: test_gatherdpd_ymm:
293; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000294; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
295; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000296;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000297; ZNVER1-LABEL: test_gatherdpd_ymm:
298; ZNVER1: # BB#0:
299; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
300; ZNVER1-NEXT: retq # sched: [1:0.50]
301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
302 ret <4 x double> %1
303}
304declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
305
306define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
307; GENERIC-LABEL: test_gatherdps:
308; GENERIC: # BB#0:
309; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
310; GENERIC-NEXT: retq # sched: [1:1.00]
311;
312; HASWELL-LABEL: test_gatherdps:
313; HASWELL: # BB#0:
314; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
315; HASWELL-NEXT: retq # sched: [2:1.00]
316;
Gadi Haber85d99b42017-10-17 13:45:39 +0000317; BROADWELL-LABEL: test_gatherdps:
318; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000319; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
320; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000321;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000322; SKYLAKE-LABEL: test_gatherdps:
323; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000324; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
325; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000326;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000327; SKX-LABEL: test_gatherdps:
328; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000329; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
330; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000331;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000332; ZNVER1-LABEL: test_gatherdps:
333; ZNVER1: # BB#0:
334; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
335; ZNVER1-NEXT: retq # sched: [1:0.50]
336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
337 ret <4 x float> %1
338}
339declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
340
341define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
342; GENERIC-LABEL: test_gatherdps_ymm:
343; GENERIC: # BB#0:
344; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0
345; GENERIC-NEXT: retq # sched: [1:1.00]
346;
347; HASWELL-LABEL: test_gatherdps_ymm:
348; HASWELL: # BB#0:
349; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?]
350; HASWELL-NEXT: retq # sched: [2:1.00]
351;
Gadi Haber85d99b42017-10-17 13:45:39 +0000352; BROADWELL-LABEL: test_gatherdps_ymm:
353; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000354; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
355; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000356;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000357; SKYLAKE-LABEL: test_gatherdps_ymm:
358; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000359; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
360; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000361;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000362; SKX-LABEL: test_gatherdps_ymm:
363; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000364; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
365; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000366;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000367; ZNVER1-LABEL: test_gatherdps_ymm:
368; ZNVER1: # BB#0:
369; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
370; ZNVER1-NEXT: retq # sched: [1:0.50]
371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
372 ret <8 x float> %1
373}
374declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
375
376define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
377; GENERIC-LABEL: test_gatherqpd:
378; GENERIC: # BB#0:
379; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
380; GENERIC-NEXT: retq # sched: [1:1.00]
381;
382; HASWELL-LABEL: test_gatherqpd:
383; HASWELL: # BB#0:
384; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
385; HASWELL-NEXT: retq # sched: [2:1.00]
386;
Gadi Haber85d99b42017-10-17 13:45:39 +0000387; BROADWELL-LABEL: test_gatherqpd:
388; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000389; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
390; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000391;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000392; SKYLAKE-LABEL: test_gatherqpd:
393; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000394; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000397; SKX-LABEL: test_gatherqpd:
398; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000399; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
400; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000401;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000402; ZNVER1-LABEL: test_gatherqpd:
403; ZNVER1: # BB#0:
404; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
405; ZNVER1-NEXT: retq # sched: [1:0.50]
406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
407 ret <2 x double> %1
408}
409declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
410
411define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
412; GENERIC-LABEL: test_gatherqpd_ymm:
413; GENERIC: # BB#0:
414; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0
415; GENERIC-NEXT: retq # sched: [1:1.00]
416;
417; HASWELL-LABEL: test_gatherqpd_ymm:
418; HASWELL: # BB#0:
419; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?]
420; HASWELL-NEXT: retq # sched: [2:1.00]
421;
Gadi Haber85d99b42017-10-17 13:45:39 +0000422; BROADWELL-LABEL: test_gatherqpd_ymm:
423; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000424; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
425; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000426;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000427; SKYLAKE-LABEL: test_gatherqpd_ymm:
428; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000429; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
430; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000431;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000432; SKX-LABEL: test_gatherqpd_ymm:
433; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000434; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
435; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000436;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000437; ZNVER1-LABEL: test_gatherqpd_ymm:
438; ZNVER1: # BB#0:
439; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
440; ZNVER1-NEXT: retq # sched: [1:0.50]
441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
442 ret <4 x double> %1
443}
444declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
445
446define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
447; GENERIC-LABEL: test_gatherqps:
448; GENERIC: # BB#0:
449; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
450; GENERIC-NEXT: retq # sched: [1:1.00]
451;
452; HASWELL-LABEL: test_gatherqps:
453; HASWELL: # BB#0:
454; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
455; HASWELL-NEXT: retq # sched: [2:1.00]
456;
Gadi Haber85d99b42017-10-17 13:45:39 +0000457; BROADWELL-LABEL: test_gatherqps:
458; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000459; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
460; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000461;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000462; SKYLAKE-LABEL: test_gatherqps:
463; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000464; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
465; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000466;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000467; SKX-LABEL: test_gatherqps:
468; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000469; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
470; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000471;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000472; ZNVER1-LABEL: test_gatherqps:
473; ZNVER1: # BB#0:
474; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
475; ZNVER1-NEXT: retq # sched: [1:0.50]
476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
477 ret <4 x float> %1
478}
479declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
480
481define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
482; GENERIC-LABEL: test_gatherqps_ymm:
483; GENERIC: # BB#0:
484; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0
485; GENERIC-NEXT: vzeroupper
486; GENERIC-NEXT: retq # sched: [1:1.00]
487;
488; HASWELL-LABEL: test_gatherqps_ymm:
489; HASWELL: # BB#0:
490; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?]
491; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
492; HASWELL-NEXT: retq # sched: [2:1.00]
493;
Gadi Haber85d99b42017-10-17 13:45:39 +0000494; BROADWELL-LABEL: test_gatherqps_ymm:
495; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000496; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000497; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000498; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000499;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000500; SKYLAKE-LABEL: test_gatherqps_ymm:
501; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000502; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000503; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000504; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000505;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000506; SKX-LABEL: test_gatherqps_ymm:
507; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000508; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000509; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000510; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000511;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000512; ZNVER1-LABEL: test_gatherqps_ymm:
513; ZNVER1: # BB#0:
514; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
515; ZNVER1-NEXT: vzeroupper # sched: [100:?]
516; ZNVER1-NEXT: retq # sched: [1:0.50]
517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
518 ret <4 x float> %1
519}
520declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
521
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000522define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
523; GENERIC-LABEL: test_inserti128:
524; GENERIC: # BB#0:
525; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
526; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
527; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
528; GENERIC-NEXT: retq # sched: [1:1.00]
529;
530; HASWELL-LABEL: test_inserti128:
531; HASWELL: # BB#0:
532; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
533; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
534; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
535; HASWELL-NEXT: retq # sched: [2:1.00]
536;
Gadi Haber85d99b42017-10-17 13:45:39 +0000537; BROADWELL-LABEL: test_inserti128:
538; BROADWELL: # BB#0:
539; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000540; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000541; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000542; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000543;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000544; SKYLAKE-LABEL: test_inserti128:
545; SKYLAKE: # BB#0:
546; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000547; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
548; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
549; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000550;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000551; SKX-LABEL: test_inserti128:
552; SKX: # BB#0:
553; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000554; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
555; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
556; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000557;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000558; ZNVER1-LABEL: test_inserti128:
559; ZNVER1: # BB#0:
560; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
561; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
562; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
563; ZNVER1-NEXT: retq # sched: [1:0.50]
564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
569 %6 = add <8 x i32> %2, %5
570 ret <8 x i32> %6
571}
572
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000573define <4 x i64> @test_movntdqa(i8* %a0) {
574; GENERIC-LABEL: test_movntdqa:
575; GENERIC: # BB#0:
576; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [4:0.50]
577; GENERIC-NEXT: retq # sched: [1:1.00]
578;
579; HASWELL-LABEL: test_movntdqa:
580; HASWELL: # BB#0:
581; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50]
582; HASWELL-NEXT: retq # sched: [2:1.00]
583;
Gadi Haber85d99b42017-10-17 13:45:39 +0000584; BROADWELL-LABEL: test_movntdqa:
585; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000586; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
587; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000588;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000589; SKYLAKE-LABEL: test_movntdqa:
590; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000591; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
592; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000593;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000594; SKX-LABEL: test_movntdqa:
595; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000596; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
597; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000598;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000599; ZNVER1-LABEL: test_movntdqa:
600; ZNVER1: # BB#0:
601; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
602; ZNVER1-NEXT: retq # sched: [1:0.50]
603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
604 ret <4 x i64> %1
605}
606declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
607
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000608define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
609; GENERIC-LABEL: test_mpsadbw:
610; GENERIC: # BB#0:
611; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
612; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
613; GENERIC-NEXT: retq # sched: [1:1.00]
614;
615; HASWELL-LABEL: test_mpsadbw:
616; HASWELL: # BB#0:
617; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
618; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
619; HASWELL-NEXT: retq # sched: [2:1.00]
620;
Gadi Haber85d99b42017-10-17 13:45:39 +0000621; BROADWELL-LABEL: test_mpsadbw:
622; BROADWELL: # BB#0:
623; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000624; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
625; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000626;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000627; SKYLAKE-LABEL: test_mpsadbw:
628; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000629; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000630; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
631; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000632;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000633; SKX-LABEL: test_mpsadbw:
634; SKX: # BB#0:
635; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000636; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
637; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000638;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000639; ZNVER1-LABEL: test_mpsadbw:
640; ZNVER1: # BB#0:
641; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
642; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
643; ZNVER1-NEXT: retq # sched: [1:0.50]
644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
645 %2 = bitcast <16 x i16> %1 to <32 x i8>
646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
648 ret <16 x i16> %4
649}
650declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
651
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000652define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000653; GENERIC-LABEL: test_pabsb:
654; GENERIC: # BB#0:
655; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
656; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00]
657; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
658; GENERIC-NEXT: retq # sched: [1:1.00]
659;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000660; HASWELL-LABEL: test_pabsb:
661; HASWELL: # BB#0:
662; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000663; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000664; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000665; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000666;
Gadi Haber85d99b42017-10-17 13:45:39 +0000667; BROADWELL-LABEL: test_pabsb:
668; BROADWELL: # BB#0:
669; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000670; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000671; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000672; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000673;
Gadi Haber767d98b2017-08-30 08:08:50 +0000674; SKYLAKE-LABEL: test_pabsb:
675; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000676; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
677; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
678; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
679; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000680;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000681; SKX-LABEL: test_pabsb:
682; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000683; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
684; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
685; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
686; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000687;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000688; ZNVER1-LABEL: test_pabsb:
689; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000690; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
691; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
692; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000693; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
697 %4 = or <32 x i8> %1, %3
698 ret <32 x i8> %4
699}
700declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
701
702define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000703; GENERIC-LABEL: test_pabsd:
704; GENERIC: # BB#0:
705; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
706; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00]
707; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
708; GENERIC-NEXT: retq # sched: [1:1.00]
709;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000710; HASWELL-LABEL: test_pabsd:
711; HASWELL: # BB#0:
712; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000713; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000714; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000715; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000716;
Gadi Haber85d99b42017-10-17 13:45:39 +0000717; BROADWELL-LABEL: test_pabsd:
718; BROADWELL: # BB#0:
719; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000720; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000721; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000723;
Gadi Haber767d98b2017-08-30 08:08:50 +0000724; SKYLAKE-LABEL: test_pabsd:
725; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000726; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
727; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
728; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
729; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000730;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000731; SKX-LABEL: test_pabsd:
732; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000733; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
734; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
735; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
736; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000737;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000738; ZNVER1-LABEL: test_pabsd:
739; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000740; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
741; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
742; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000743; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
747 %4 = or <8 x i32> %1, %3
748 ret <8 x i32> %4
749}
750declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
751
752define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000753; GENERIC-LABEL: test_pabsw:
754; GENERIC: # BB#0:
755; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
756; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00]
757; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
758; GENERIC-NEXT: retq # sched: [1:1.00]
759;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000760; HASWELL-LABEL: test_pabsw:
761; HASWELL: # BB#0:
762; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000763; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000764; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000765; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000766;
Gadi Haber85d99b42017-10-17 13:45:39 +0000767; BROADWELL-LABEL: test_pabsw:
768; BROADWELL: # BB#0:
769; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000770; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000771; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000772; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000773;
Gadi Haber767d98b2017-08-30 08:08:50 +0000774; SKYLAKE-LABEL: test_pabsw:
775; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000776; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
777; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
778; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
779; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000780;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000781; SKX-LABEL: test_pabsw:
782; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +0000783; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
784; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
785; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
786; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000787;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000788; ZNVER1-LABEL: test_pabsw:
789; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +0000790; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
791; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
792; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000793; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
797 %4 = or <16 x i16> %1, %3
798 ret <16 x i16> %4
799}
800declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
801
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000802define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
803; GENERIC-LABEL: test_packssdw:
804; GENERIC: # BB#0:
805; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
806; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
807; GENERIC-NEXT: retq # sched: [1:1.00]
808;
809; HASWELL-LABEL: test_packssdw:
810; HASWELL: # BB#0:
811; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
812; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
813; HASWELL-NEXT: retq # sched: [2:1.00]
814;
Gadi Haber85d99b42017-10-17 13:45:39 +0000815; BROADWELL-LABEL: test_packssdw:
816; BROADWELL: # BB#0:
817; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000818; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
819; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000820;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000821; SKYLAKE-LABEL: test_packssdw:
822; SKYLAKE: # BB#0:
823; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000824; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
825; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000826;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000827; SKX-LABEL: test_packssdw:
828; SKX: # BB#0:
829; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000830; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
831; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000832;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000833; ZNVER1-LABEL: test_packssdw:
834; ZNVER1: # BB#0:
835; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
836; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
837; ZNVER1-NEXT: retq # sched: [1:0.50]
838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
839 %2 = bitcast <16 x i16> %1 to <8 x i32>
840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
842 ret <16 x i16> %4
843}
844declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
845
846define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
847; GENERIC-LABEL: test_packsswb:
848; GENERIC: # BB#0:
849; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
850; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
851; GENERIC-NEXT: retq # sched: [1:1.00]
852;
853; HASWELL-LABEL: test_packsswb:
854; HASWELL: # BB#0:
855; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
856; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
857; HASWELL-NEXT: retq # sched: [2:1.00]
858;
Gadi Haber85d99b42017-10-17 13:45:39 +0000859; BROADWELL-LABEL: test_packsswb:
860; BROADWELL: # BB#0:
861; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000862; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
863; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000864;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000865; SKYLAKE-LABEL: test_packsswb:
866; SKYLAKE: # BB#0:
867; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000868; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
869; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000870;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000871; SKX-LABEL: test_packsswb:
872; SKX: # BB#0:
873; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000874; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
875; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000876;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000877; ZNVER1-LABEL: test_packsswb:
878; ZNVER1: # BB#0:
879; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
880; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
881; ZNVER1-NEXT: retq # sched: [1:0.50]
882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
883 %2 = bitcast <32 x i8> %1 to <16 x i16>
884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
886 ret <32 x i8> %4
887}
888declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
889
890define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
891; GENERIC-LABEL: test_packusdw:
892; GENERIC: # BB#0:
893; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
894; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
895; GENERIC-NEXT: retq # sched: [1:1.00]
896;
897; HASWELL-LABEL: test_packusdw:
898; HASWELL: # BB#0:
899; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
900; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
901; HASWELL-NEXT: retq # sched: [2:1.00]
902;
Gadi Haber85d99b42017-10-17 13:45:39 +0000903; BROADWELL-LABEL: test_packusdw:
904; BROADWELL: # BB#0:
905; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000906; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
907; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000908;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000909; SKYLAKE-LABEL: test_packusdw:
910; SKYLAKE: # BB#0:
911; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000912; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
913; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000914;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000915; SKX-LABEL: test_packusdw:
916; SKX: # BB#0:
917; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000918; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
919; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000920;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000921; ZNVER1-LABEL: test_packusdw:
922; ZNVER1: # BB#0:
923; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
924; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
925; ZNVER1-NEXT: retq # sched: [1:0.50]
926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
927 %2 = bitcast <16 x i16> %1 to <8 x i32>
928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
930 ret <16 x i16> %4
931}
932declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
933
934define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
935; GENERIC-LABEL: test_packuswb:
936; GENERIC: # BB#0:
937; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
938; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
939; GENERIC-NEXT: retq # sched: [1:1.00]
940;
941; HASWELL-LABEL: test_packuswb:
942; HASWELL: # BB#0:
943; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
944; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
945; HASWELL-NEXT: retq # sched: [2:1.00]
946;
Gadi Haber85d99b42017-10-17 13:45:39 +0000947; BROADWELL-LABEL: test_packuswb:
948; BROADWELL: # BB#0:
949; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000950; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
951; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000952;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000953; SKYLAKE-LABEL: test_packuswb:
954; SKYLAKE: # BB#0:
955; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000956; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
957; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000958;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000959; SKX-LABEL: test_packuswb:
960; SKX: # BB#0:
961; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000962; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
963; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000964;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000965; ZNVER1-LABEL: test_packuswb:
966; ZNVER1: # BB#0:
967; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
968; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
969; ZNVER1-NEXT: retq # sched: [1:0.50]
970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
971 %2 = bitcast <32 x i8> %1 to <16 x i16>
972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
974 ret <32 x i8> %4
975}
976declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
977
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000978define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000979; GENERIC-LABEL: test_paddb:
980; GENERIC: # BB#0:
981; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
982; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
983; GENERIC-NEXT: retq # sched: [1:1.00]
984;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000985; HASWELL-LABEL: test_paddb:
986; HASWELL: # BB#0:
987; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +0000988; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
989; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000990;
Gadi Haber85d99b42017-10-17 13:45:39 +0000991; BROADWELL-LABEL: test_paddb:
992; BROADWELL: # BB#0:
993; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000994; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
995; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000996;
Gadi Haber767d98b2017-08-30 08:08:50 +0000997; SKYLAKE-LABEL: test_paddb:
998; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000999; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1000; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1001; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001002;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001003; SKX-LABEL: test_paddb:
1004; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001005; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1006; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1007; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001008;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001009; ZNVER1-LABEL: test_paddb:
1010; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001011; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1012; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001013; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001014 %1 = add <32 x i8> %a0, %a1
1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1016 %3 = add <32 x i8> %1, %2
1017 ret <32 x i8> %3
1018}
1019
1020define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001021; GENERIC-LABEL: test_paddd:
1022; GENERIC: # BB#0:
1023; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1024; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1025; GENERIC-NEXT: retq # sched: [1:1.00]
1026;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001027; HASWELL-LABEL: test_paddd:
1028; HASWELL: # BB#0:
1029; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001030; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1031; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001032;
Gadi Haber85d99b42017-10-17 13:45:39 +00001033; BROADWELL-LABEL: test_paddd:
1034; BROADWELL: # BB#0:
1035; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001036; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1037; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001038;
Gadi Haber767d98b2017-08-30 08:08:50 +00001039; SKYLAKE-LABEL: test_paddd:
1040; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001041; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1042; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001045; SKX-LABEL: test_paddd:
1046; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001047; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1048; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1049; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001050;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001051; ZNVER1-LABEL: test_paddd:
1052; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001053; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1054; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001055; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001056 %1 = add <8 x i32> %a0, %a1
1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1058 %3 = add <8 x i32> %1, %2
1059 ret <8 x i32> %3
1060}
1061
1062define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001063; GENERIC-LABEL: test_paddq:
1064; GENERIC: # BB#0:
1065; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1066; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1067; GENERIC-NEXT: retq # sched: [1:1.00]
1068;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001069; HASWELL-LABEL: test_paddq:
1070; HASWELL: # BB#0:
1071; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001072; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1073; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001074;
Gadi Haber85d99b42017-10-17 13:45:39 +00001075; BROADWELL-LABEL: test_paddq:
1076; BROADWELL: # BB#0:
1077; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001078; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1079; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001080;
Gadi Haber767d98b2017-08-30 08:08:50 +00001081; SKYLAKE-LABEL: test_paddq:
1082; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001083; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1084; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1085; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001086;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001087; SKX-LABEL: test_paddq:
1088; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001089; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1090; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1091; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001092;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001093; ZNVER1-LABEL: test_paddq:
1094; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001095; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1096; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001097; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001098 %1 = add <4 x i64> %a0, %a1
1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1100 %3 = add <4 x i64> %1, %2
1101 ret <4 x i64> %3
1102}
1103
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001104define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1105; GENERIC-LABEL: test_paddsb:
1106; GENERIC: # BB#0:
1107; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1108; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1109; GENERIC-NEXT: retq # sched: [1:1.00]
1110;
1111; HASWELL-LABEL: test_paddsb:
1112; HASWELL: # BB#0:
1113; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1114; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1115; HASWELL-NEXT: retq # sched: [2:1.00]
1116;
Gadi Haber85d99b42017-10-17 13:45:39 +00001117; BROADWELL-LABEL: test_paddsb:
1118; BROADWELL: # BB#0:
1119; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001120; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1121; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001122;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001123; SKYLAKE-LABEL: test_paddsb:
1124; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001125; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1126; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1127; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001128;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001129; SKX-LABEL: test_paddsb:
1130; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001131; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1132; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1133; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001134;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001135; ZNVER1-LABEL: test_paddsb:
1136; ZNVER1: # BB#0:
1137; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1138; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1139; ZNVER1-NEXT: retq # sched: [1:0.50]
1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2)
1143 ret <32 x i8> %3
1144}
1145declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
1146
1147define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1148; GENERIC-LABEL: test_paddsw:
1149; GENERIC: # BB#0:
1150; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1151; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1152; GENERIC-NEXT: retq # sched: [1:1.00]
1153;
1154; HASWELL-LABEL: test_paddsw:
1155; HASWELL: # BB#0:
1156; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1157; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1158; HASWELL-NEXT: retq # sched: [2:1.00]
1159;
Gadi Haber85d99b42017-10-17 13:45:39 +00001160; BROADWELL-LABEL: test_paddsw:
1161; BROADWELL: # BB#0:
1162; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001163; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1164; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001165;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001166; SKYLAKE-LABEL: test_paddsw:
1167; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001168; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1169; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1170; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001171;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001172; SKX-LABEL: test_paddsw:
1173; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001174; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1175; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1176; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001177;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001178; ZNVER1-LABEL: test_paddsw:
1179; ZNVER1: # BB#0:
1180; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1181; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1182; ZNVER1-NEXT: retq # sched: [1:0.50]
1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2)
1186 ret <16 x i16> %3
1187}
1188declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
1189
1190define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1191; GENERIC-LABEL: test_paddusb:
1192; GENERIC: # BB#0:
1193; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1194; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1195; GENERIC-NEXT: retq # sched: [1:1.00]
1196;
1197; HASWELL-LABEL: test_paddusb:
1198; HASWELL: # BB#0:
1199; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1200; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1201; HASWELL-NEXT: retq # sched: [2:1.00]
1202;
Gadi Haber85d99b42017-10-17 13:45:39 +00001203; BROADWELL-LABEL: test_paddusb:
1204; BROADWELL: # BB#0:
1205; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001206; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1207; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001208;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001209; SKYLAKE-LABEL: test_paddusb:
1210; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001211; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1212; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1213; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001214;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001215; SKX-LABEL: test_paddusb:
1216; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001217; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1218; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1219; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001220;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001221; ZNVER1-LABEL: test_paddusb:
1222; ZNVER1: # BB#0:
1223; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1224; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1225; ZNVER1-NEXT: retq # sched: [1:0.50]
1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2)
1229 ret <32 x i8> %3
1230}
1231declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
1232
1233define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1234; GENERIC-LABEL: test_paddusw:
1235; GENERIC: # BB#0:
1236; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1237; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1238; GENERIC-NEXT: retq # sched: [1:1.00]
1239;
1240; HASWELL-LABEL: test_paddusw:
1241; HASWELL: # BB#0:
1242; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1243; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1244; HASWELL-NEXT: retq # sched: [2:1.00]
1245;
Gadi Haber85d99b42017-10-17 13:45:39 +00001246; BROADWELL-LABEL: test_paddusw:
1247; BROADWELL: # BB#0:
1248; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001249; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001251;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001252; SKYLAKE-LABEL: test_paddusw:
1253; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001254; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1255; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1256; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001257;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001258; SKX-LABEL: test_paddusw:
1259; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001260; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1261; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1262; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001263;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001264; ZNVER1-LABEL: test_paddusw:
1265; ZNVER1: # BB#0:
1266; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1267; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1268; ZNVER1-NEXT: retq # sched: [1:0.50]
1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2)
1272 ret <16 x i16> %3
1273}
1274declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
1275
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001276define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001277; GENERIC-LABEL: test_paddw:
1278; GENERIC: # BB#0:
1279; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1280; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1281; GENERIC-NEXT: retq # sched: [1:1.00]
1282;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001283; HASWELL-LABEL: test_paddw:
1284; HASWELL: # BB#0:
1285; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001286; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1287; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001288;
Gadi Haber85d99b42017-10-17 13:45:39 +00001289; BROADWELL-LABEL: test_paddw:
1290; BROADWELL: # BB#0:
1291; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001292; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1293; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001294;
Gadi Haber767d98b2017-08-30 08:08:50 +00001295; SKYLAKE-LABEL: test_paddw:
1296; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001297; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1298; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1299; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001300;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001301; SKX-LABEL: test_paddw:
1302; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001303; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1304; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1305; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001306;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001307; ZNVER1-LABEL: test_paddw:
1308; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001309; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1310; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001311; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001312 %1 = add <16 x i16> %a0, %a1
1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1314 %3 = add <16 x i16> %1, %2
1315 ret <16 x i16> %3
1316}
1317
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001318define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1319; GENERIC-LABEL: test_palignr:
1320; GENERIC: # BB#0:
1321; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1322; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [5:1.00]
1323; GENERIC-NEXT: retq # sched: [1:1.00]
1324;
1325; HASWELL-LABEL: test_palignr:
1326; HASWELL: # BB#0:
1327; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1328; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1329; HASWELL-NEXT: retq # sched: [2:1.00]
1330;
Gadi Haber85d99b42017-10-17 13:45:39 +00001331; BROADWELL-LABEL: test_palignr:
1332; BROADWELL: # BB#0:
1333; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001334; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [7:1.00]
1335; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001336;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001337; SKYLAKE-LABEL: test_palignr:
1338; SKYLAKE: # BB#0:
1339; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001340; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1341; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001342;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001343; SKX-LABEL: test_palignr:
1344; SKX: # BB#0:
1345; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001346; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1347; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001348;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001349; ZNVER1-LABEL: test_palignr:
1350; ZNVER1: # BB#0:
1351; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1352; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:0.50]
1353; ZNVER1-NEXT: retq # sched: [1:0.50]
1354 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1355 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1356 %3 = shufflevector <32 x i8> %2, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1357 ret <32 x i8> %3
1358}
1359
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001360define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001361; GENERIC-LABEL: test_pand:
1362; GENERIC: # BB#0:
1363; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1364; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
1365; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1366; GENERIC-NEXT: retq # sched: [1:1.00]
1367;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001368; HASWELL-LABEL: test_pand:
1369; HASWELL: # BB#0:
1370; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001371; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001372; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001373; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001374;
Gadi Haber85d99b42017-10-17 13:45:39 +00001375; BROADWELL-LABEL: test_pand:
1376; BROADWELL: # BB#0:
1377; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001378; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001379; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001380; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001381;
Gadi Haber767d98b2017-08-30 08:08:50 +00001382; SKYLAKE-LABEL: test_pand:
1383; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001384; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1385; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1386; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1387; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001388;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001389; SKX-LABEL: test_pand:
1390; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001391; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1392; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1393; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1394; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001395;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001396; ZNVER1-LABEL: test_pand:
1397; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001398; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1399; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1400; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001401; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001402 %1 = and <4 x i64> %a0, %a1
1403 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1404 %3 = and <4 x i64> %1, %2
1405 %4 = add <4 x i64> %3, %a1
1406 ret <4 x i64> %4
1407}
1408
1409define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001410; GENERIC-LABEL: test_pandn:
1411; GENERIC: # BB#0:
1412; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1413; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
1414; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1415; GENERIC-NEXT: retq # sched: [1:1.00]
1416;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001417; HASWELL-LABEL: test_pandn:
1418; HASWELL: # BB#0:
1419; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001420; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001421; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00001422; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001423;
Gadi Haber85d99b42017-10-17 13:45:39 +00001424; BROADWELL-LABEL: test_pandn:
1425; BROADWELL: # BB#0:
1426; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001427; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001428; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001429; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001430;
Gadi Haber767d98b2017-08-30 08:08:50 +00001431; SKYLAKE-LABEL: test_pandn:
1432; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001433; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1434; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1435; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1436; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001437;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001438; SKX-LABEL: test_pandn:
1439; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001440; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1441; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1442; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1443; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001444;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001445; ZNVER1-LABEL: test_pandn:
1446; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00001447; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1448; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1449; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001450; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001451 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1452 %2 = and <4 x i64> %a1, %1
1453 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1454 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1455 %5 = and <4 x i64> %3, %4
1456 %6 = add <4 x i64> %2, %5
1457 ret <4 x i64> %6
1458}
1459
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001460define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1461; GENERIC-LABEL: test_pavgb:
1462; GENERIC: # BB#0:
1463; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1464; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1465; GENERIC-NEXT: retq # sched: [1:1.00]
1466;
1467; HASWELL-LABEL: test_pavgb:
1468; HASWELL: # BB#0:
1469; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1470; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1471; HASWELL-NEXT: retq # sched: [2:1.00]
1472;
Gadi Haber85d99b42017-10-17 13:45:39 +00001473; BROADWELL-LABEL: test_pavgb:
1474; BROADWELL: # BB#0:
1475; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001476; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1477; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001478;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001479; SKYLAKE-LABEL: test_pavgb:
1480; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001481; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1482; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1483; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001484;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001485; SKX-LABEL: test_pavgb:
1486; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001487; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1488; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1489; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001490;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001491; ZNVER1-LABEL: test_pavgb:
1492; ZNVER1: # BB#0:
1493; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1494; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1495; ZNVER1-NEXT: retq # sched: [1:0.50]
1496 %1 = zext <32 x i8> %a0 to <32 x i16>
1497 %2 = zext <32 x i8> %a1 to <32 x i16>
1498 %3 = add <32 x i16> %1, %2
1499 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1500 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1501 %6 = trunc <32 x i16> %5 to <32 x i8>
1502 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1503 %8 = zext <32 x i8> %6 to <32 x i16>
1504 %9 = zext <32 x i8> %7 to <32 x i16>
1505 %10 = add <32 x i16> %8, %9
1506 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1507 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1508 %13 = trunc <32 x i16> %12 to <32 x i8>
1509 ret <32 x i8> %13
1510}
1511
1512define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1513; GENERIC-LABEL: test_pavgw:
1514; GENERIC: # BB#0:
1515; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1516; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1517; GENERIC-NEXT: retq # sched: [1:1.00]
1518;
1519; HASWELL-LABEL: test_pavgw:
1520; HASWELL: # BB#0:
1521; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1522; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1523; HASWELL-NEXT: retq # sched: [2:1.00]
1524;
Gadi Haber85d99b42017-10-17 13:45:39 +00001525; BROADWELL-LABEL: test_pavgw:
1526; BROADWELL: # BB#0:
1527; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001528; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1529; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001530;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001531; SKYLAKE-LABEL: test_pavgw:
1532; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001533; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1534; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1535; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001536;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001537; SKX-LABEL: test_pavgw:
1538; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001539; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1540; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1541; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001542;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001543; ZNVER1-LABEL: test_pavgw:
1544; ZNVER1: # BB#0:
1545; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1546; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1547; ZNVER1-NEXT: retq # sched: [1:0.50]
1548 %1 = zext <16 x i16> %a0 to <16 x i32>
1549 %2 = zext <16 x i16> %a1 to <16 x i32>
1550 %3 = add <16 x i32> %1, %2
1551 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1552 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1553 %6 = trunc <16 x i32> %5 to <16 x i16>
1554 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1555 %8 = zext <16 x i16> %6 to <16 x i32>
1556 %9 = zext <16 x i16> %7 to <16 x i32>
1557 %10 = add <16 x i32> %8, %9
1558 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1559 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1560 %13 = trunc <16 x i32> %12 to <16 x i16>
1561 ret <16 x i16> %13
1562}
1563
1564define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1565; GENERIC-LABEL: test_pblendd:
1566; GENERIC: # BB#0:
1567; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1568; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
1569; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1570; GENERIC-NEXT: retq # sched: [1:1.00]
1571;
1572; HASWELL-LABEL: test_pblendd:
1573; HASWELL: # BB#0:
1574; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1575; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50]
1576; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1577; HASWELL-NEXT: retq # sched: [2:1.00]
1578;
Gadi Haber85d99b42017-10-17 13:45:39 +00001579; BROADWELL-LABEL: test_pblendd:
1580; BROADWELL: # BB#0:
1581; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001582; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001583; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001584; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001585;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001586; SKYLAKE-LABEL: test_pblendd:
1587; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001588; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1589; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1590; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1591; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001592;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001593; SKX-LABEL: test_pblendd:
1594; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001595; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1596; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1597; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1598; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001599;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001600; ZNVER1-LABEL: test_pblendd:
1601; ZNVER1: # BB#0:
1602; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1603; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1604; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1605; ZNVER1-NEXT: retq # sched: [1:0.50]
1606 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1607 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1608 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1609 %4 = add <4 x i32> %a0, %3
1610 ret <4 x i32> %4
1611}
1612
1613define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1614; GENERIC-LABEL: test_pblendd_ymm:
1615; GENERIC: # BB#0:
1616; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1617; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
1618; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1619; GENERIC-NEXT: retq # sched: [1:1.00]
1620;
1621; HASWELL-LABEL: test_pblendd_ymm:
1622; HASWELL: # BB#0:
1623; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1624; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50]
1625; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1626; HASWELL-NEXT: retq # sched: [2:1.00]
1627;
Gadi Haber85d99b42017-10-17 13:45:39 +00001628; BROADWELL-LABEL: test_pblendd_ymm:
1629; BROADWELL: # BB#0:
1630; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001631; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001632; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001633; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001634;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001635; SKYLAKE-LABEL: test_pblendd_ymm:
1636; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001637; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1638; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1639; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1640; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001641;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001642; SKX-LABEL: test_pblendd_ymm:
1643; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00001644; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1645; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1646; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1647; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001648;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001649; ZNVER1-LABEL: test_pblendd_ymm:
1650; ZNVER1: # BB#0:
1651; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1652; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1653; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1654; ZNVER1-NEXT: retq # sched: [1:0.50]
1655 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1656 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1657 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1658 %4 = add <8 x i32> %a0, %3
1659 ret <8 x i32> %4
1660}
1661
1662define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1663; GENERIC-LABEL: test_pblendvb:
1664; GENERIC: # BB#0:
1665; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1666; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1667; GENERIC-NEXT: retq # sched: [1:1.00]
1668;
1669; HASWELL-LABEL: test_pblendvb:
1670; HASWELL: # BB#0:
1671; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1672; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
1673; HASWELL-NEXT: retq # sched: [2:1.00]
1674;
Gadi Haber85d99b42017-10-17 13:45:39 +00001675; BROADWELL-LABEL: test_pblendvb:
1676; BROADWELL: # BB#0:
1677; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001678; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1679; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001680;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001681; SKYLAKE-LABEL: test_pblendvb:
1682; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001683; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001684; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1685; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001686;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001687; SKX-LABEL: test_pblendvb:
1688; SKX: # BB#0:
1689; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00001690; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1691; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001692;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001693; ZNVER1-LABEL: test_pblendvb:
1694; ZNVER1: # BB#0:
1695; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1696; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1697; ZNVER1-NEXT: retq # sched: [1:0.50]
1698 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1699 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1700 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1701 ret <32 x i8> %3
1702}
1703declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1704
1705define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1706; GENERIC-LABEL: test_pblendw:
1707; GENERIC: # BB#0:
1708; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
1709; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [5:0.50]
1710; GENERIC-NEXT: retq # sched: [1:1.00]
1711;
1712; HASWELL-LABEL: test_pblendw:
1713; HASWELL: # BB#0:
1714; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1715; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00]
1716; HASWELL-NEXT: retq # sched: [2:1.00]
1717;
Gadi Haber85d99b42017-10-17 13:45:39 +00001718; BROADWELL-LABEL: test_pblendw:
1719; BROADWELL: # BB#0:
1720; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001721; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [7:1.00]
1722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001723;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001724; SKYLAKE-LABEL: test_pblendw:
1725; SKYLAKE: # BB#0:
1726; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001727; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1728; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001729;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001730; SKX-LABEL: test_pblendw:
1731; SKX: # BB#0:
1732; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001733; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1734; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001735;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001736; ZNVER1-LABEL: test_pblendw:
1737; ZNVER1: # BB#0:
1738; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
1739; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [9:0.50]
1740; ZNVER1-NEXT: retq # sched: [1:0.50]
1741 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1742 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1743 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1744 ret <16 x i16> %3
1745}
1746
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001747define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1748; GENERIC-LABEL: test_pbroadcastb:
1749; GENERIC: # BB#0:
1750; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
1751; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
1752; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1753; GENERIC-NEXT: retq # sched: [1:1.00]
1754;
1755; HASWELL-LABEL: test_pbroadcastb:
1756; HASWELL: # BB#0:
1757; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1758; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00]
1759; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1760; HASWELL-NEXT: retq # sched: [2:1.00]
1761;
Gadi Haber85d99b42017-10-17 13:45:39 +00001762; BROADWELL-LABEL: test_pbroadcastb:
1763; BROADWELL: # BB#0:
1764; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001765; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001766; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001767; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001768;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001769; SKYLAKE-LABEL: test_pbroadcastb:
1770; SKYLAKE: # BB#0:
1771; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001772; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1773; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1774; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001775;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001776; SKX-LABEL: test_pbroadcastb:
1777; SKX: # BB#0:
1778; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001779; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1780; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1781; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001782;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001783; ZNVER1-LABEL: test_pbroadcastb:
1784; ZNVER1: # BB#0:
1785; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1786; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1787; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1788; ZNVER1-NEXT: retq # sched: [1:0.50]
1789 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1790 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1791 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1792 %4 = add <16 x i8> %1, %3
1793 ret <16 x i8> %4
1794}
1795
1796define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1797; GENERIC-LABEL: test_pbroadcastb_ymm:
1798; GENERIC: # BB#0:
1799; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1800; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
1801; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1802; GENERIC-NEXT: retq # sched: [1:1.00]
1803;
1804; HASWELL-LABEL: test_pbroadcastb_ymm:
1805; HASWELL: # BB#0:
1806; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1807; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00]
1808; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1809; HASWELL-NEXT: retq # sched: [2:1.00]
1810;
Gadi Haber85d99b42017-10-17 13:45:39 +00001811; BROADWELL-LABEL: test_pbroadcastb_ymm:
1812; BROADWELL: # BB#0:
1813; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001814; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001815; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001816; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001817;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001818; SKYLAKE-LABEL: test_pbroadcastb_ymm:
1819; SKYLAKE: # BB#0:
1820; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001821; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1822; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1823; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001824;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001825; SKX-LABEL: test_pbroadcastb_ymm:
1826; SKX: # BB#0:
1827; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001828; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1829; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1830; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001831;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001832; ZNVER1-LABEL: test_pbroadcastb_ymm:
1833; ZNVER1: # BB#0:
1834; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1835; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1836; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1837; ZNVER1-NEXT: retq # sched: [1:0.50]
1838 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1839 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1840 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1841 %4 = add <32 x i8> %1, %3
1842 ret <32 x i8> %4
1843}
1844
1845define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1846; GENERIC-LABEL: test_pbroadcastd:
1847; GENERIC: # BB#0:
1848; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1849; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
1850; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1851; GENERIC-NEXT: retq # sched: [1:1.00]
1852;
1853; HASWELL-LABEL: test_pbroadcastd:
1854; HASWELL: # BB#0:
1855; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1856; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50]
1857; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1858; HASWELL-NEXT: retq # sched: [2:1.00]
1859;
Gadi Haber85d99b42017-10-17 13:45:39 +00001860; BROADWELL-LABEL: test_pbroadcastd:
1861; BROADWELL: # BB#0:
1862; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001863; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001864; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001865; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001866;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001867; SKYLAKE-LABEL: test_pbroadcastd:
1868; SKYLAKE: # BB#0:
1869; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001870; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1871; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1872; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001873;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001874; SKX-LABEL: test_pbroadcastd:
1875; SKX: # BB#0:
1876; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001877; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
1878; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001879;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001880; ZNVER1-LABEL: test_pbroadcastd:
1881; ZNVER1: # BB#0:
1882; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1883; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1884; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1885; ZNVER1-NEXT: retq # sched: [1:0.50]
1886 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1887 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1888 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1889 %4 = add <4 x i32> %1, %3
1890 ret <4 x i32> %4
1891}
1892
1893define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1894; GENERIC-LABEL: test_pbroadcastd_ymm:
1895; GENERIC: # BB#0:
1896; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1897; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
1898; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1899; GENERIC-NEXT: retq # sched: [1:1.00]
1900;
1901; HASWELL-LABEL: test_pbroadcastd_ymm:
1902; HASWELL: # BB#0:
1903; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1904; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50]
1905; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1906; HASWELL-NEXT: retq # sched: [2:1.00]
1907;
Gadi Haber85d99b42017-10-17 13:45:39 +00001908; BROADWELL-LABEL: test_pbroadcastd_ymm:
1909; BROADWELL: # BB#0:
1910; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001911; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001912; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001913; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001914;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001915; SKYLAKE-LABEL: test_pbroadcastd_ymm:
1916; SKYLAKE: # BB#0:
1917; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001918; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1919; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1920; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001921;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001922; SKX-LABEL: test_pbroadcastd_ymm:
1923; SKX: # BB#0:
1924; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001925; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1926; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001927;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001928; ZNVER1-LABEL: test_pbroadcastd_ymm:
1929; ZNVER1: # BB#0:
1930; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1931; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1932; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1933; ZNVER1-NEXT: retq # sched: [1:0.50]
1934 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1935 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1936 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1937 %4 = add <8 x i32> %1, %3
1938 ret <8 x i32> %4
1939}
1940
1941define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1942; GENERIC-LABEL: test_pbroadcastq:
1943; GENERIC: # BB#0:
1944; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1945; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
1946; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1947; GENERIC-NEXT: retq # sched: [1:1.00]
1948;
1949; HASWELL-LABEL: test_pbroadcastq:
1950; HASWELL: # BB#0:
1951; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1952; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50]
1953; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1954; HASWELL-NEXT: retq # sched: [2:1.00]
1955;
Gadi Haber85d99b42017-10-17 13:45:39 +00001956; BROADWELL-LABEL: test_pbroadcastq:
1957; BROADWELL: # BB#0:
1958; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001959; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001960; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001961; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001962;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001963; SKYLAKE-LABEL: test_pbroadcastq:
1964; SKYLAKE: # BB#0:
1965; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001966; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1967; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1968; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001969;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001970; SKX-LABEL: test_pbroadcastq:
1971; SKX: # BB#0:
1972; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001973; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50]
1974; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001975;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001976; ZNVER1-LABEL: test_pbroadcastq:
1977; ZNVER1: # BB#0:
1978; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1979; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1980; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1981; ZNVER1-NEXT: retq # sched: [1:0.50]
1982 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1983 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1984 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1985 %4 = add <2 x i64> %1, %3
1986 ret <2 x i64> %4
1987}
1988
1989define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
1990; GENERIC-LABEL: test_pbroadcastq_ymm:
1991; GENERIC: # BB#0:
1992; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
1993; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
1994; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1995; GENERIC-NEXT: retq # sched: [1:1.00]
1996;
1997; HASWELL-LABEL: test_pbroadcastq_ymm:
1998; HASWELL: # BB#0:
1999; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2000; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50]
2001; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2002; HASWELL-NEXT: retq # sched: [2:1.00]
2003;
Gadi Haber85d99b42017-10-17 13:45:39 +00002004; BROADWELL-LABEL: test_pbroadcastq_ymm:
2005; BROADWELL: # BB#0:
2006; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002007; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002008; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002009; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002010;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002011; SKYLAKE-LABEL: test_pbroadcastq_ymm:
2012; SKYLAKE: # BB#0:
2013; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002014; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2015; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2016; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002017;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002018; SKX-LABEL: test_pbroadcastq_ymm:
2019; SKX: # BB#0:
2020; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002021; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50]
2022; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002023;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002024; ZNVER1-LABEL: test_pbroadcastq_ymm:
2025; ZNVER1: # BB#0:
2026; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
2027; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
2028; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2029; ZNVER1-NEXT: retq # sched: [1:0.50]
2030 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
2031 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2032 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
2033 %4 = add <4 x i64> %1, %3
2034 ret <4 x i64> %4
2035}
2036
2037define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
2038; GENERIC-LABEL: test_pbroadcastw:
2039; GENERIC: # BB#0:
2040; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
2041; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
2042; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2043; GENERIC-NEXT: retq # sched: [1:1.00]
2044;
2045; HASWELL-LABEL: test_pbroadcastw:
2046; HASWELL: # BB#0:
2047; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2048; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00]
2049; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2050; HASWELL-NEXT: retq # sched: [2:1.00]
2051;
Gadi Haber85d99b42017-10-17 13:45:39 +00002052; BROADWELL-LABEL: test_pbroadcastw:
2053; BROADWELL: # BB#0:
2054; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002055; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002056; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002057; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002058;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002059; SKYLAKE-LABEL: test_pbroadcastw:
2060; SKYLAKE: # BB#0:
2061; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002062; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2063; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2064; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002065;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002066; SKX-LABEL: test_pbroadcastw:
2067; SKX: # BB#0:
2068; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002069; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2070; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2071; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002072;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002073; ZNVER1-LABEL: test_pbroadcastw:
2074; ZNVER1: # BB#0:
2075; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
2076; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
2077; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2078; ZNVER1-NEXT: retq # sched: [1:0.50]
2079 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
2080 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
2081 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
2082 %4 = add <8 x i16> %1, %3
2083 ret <8 x i16> %4
2084}
2085
2086define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
2087; GENERIC-LABEL: test_pbroadcastw_ymm:
2088; GENERIC: # BB#0:
2089; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
2090; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
2091; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2092; GENERIC-NEXT: retq # sched: [1:1.00]
2093;
2094; HASWELL-LABEL: test_pbroadcastw_ymm:
2095; HASWELL: # BB#0:
2096; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2097; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00]
2098; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2099; HASWELL-NEXT: retq # sched: [2:1.00]
2100;
Gadi Haber85d99b42017-10-17 13:45:39 +00002101; BROADWELL-LABEL: test_pbroadcastw_ymm:
2102; BROADWELL: # BB#0:
2103; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002104; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002105; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002106; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002107;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002108; SKYLAKE-LABEL: test_pbroadcastw_ymm:
2109; SKYLAKE: # BB#0:
2110; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002111; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2112; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2113; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002114;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002115; SKX-LABEL: test_pbroadcastw_ymm:
2116; SKX: # BB#0:
2117; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002118; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2119; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2120; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002121;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002122; ZNVER1-LABEL: test_pbroadcastw_ymm:
2123; ZNVER1: # BB#0:
2124; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
2125; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
2126; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2127; ZNVER1-NEXT: retq # sched: [1:0.50]
2128 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
2129 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
2130 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
2131 %4 = add <16 x i16> %1, %3
2132 ret <16 x i16> %4
2133}
2134
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002135define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2136; GENERIC-LABEL: test_pcmpeqb:
2137; GENERIC: # BB#0:
2138; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2139; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2140; GENERIC-NEXT: retq # sched: [1:1.00]
2141;
2142; HASWELL-LABEL: test_pcmpeqb:
2143; HASWELL: # BB#0:
2144; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2145; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2146; HASWELL-NEXT: retq # sched: [2:1.00]
2147;
Gadi Haber85d99b42017-10-17 13:45:39 +00002148; BROADWELL-LABEL: test_pcmpeqb:
2149; BROADWELL: # BB#0:
2150; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002151; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2152; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002153;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002154; SKYLAKE-LABEL: test_pcmpeqb:
2155; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002156; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2157; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2158; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002159;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002160; SKX-LABEL: test_pcmpeqb:
2161; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002162; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002163; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002164; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002165; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002166; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002167;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002168; ZNVER1-LABEL: test_pcmpeqb:
2169; ZNVER1: # BB#0:
2170; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2171; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2172; ZNVER1-NEXT: retq # sched: [1:0.50]
2173 %1 = icmp eq <32 x i8> %a0, %a1
2174 %2 = sext <32 x i1> %1 to <32 x i8>
2175 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2176 %4 = icmp eq <32 x i8> %2, %3
2177 %5 = sext <32 x i1> %4 to <32 x i8>
2178 ret <32 x i8> %5
2179}
2180
2181define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2182; GENERIC-LABEL: test_pcmpeqd:
2183; GENERIC: # BB#0:
2184; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2185; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2186; GENERIC-NEXT: retq # sched: [1:1.00]
2187;
2188; HASWELL-LABEL: test_pcmpeqd:
2189; HASWELL: # BB#0:
2190; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2191; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2192; HASWELL-NEXT: retq # sched: [2:1.00]
2193;
Gadi Haber85d99b42017-10-17 13:45:39 +00002194; BROADWELL-LABEL: test_pcmpeqd:
2195; BROADWELL: # BB#0:
2196; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002197; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2198; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002199;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002200; SKYLAKE-LABEL: test_pcmpeqd:
2201; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002202; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2203; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2204; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002205;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002206; SKX-LABEL: test_pcmpeqd:
2207; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002208; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002209; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002210; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002211; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002212; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002213;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002214; ZNVER1-LABEL: test_pcmpeqd:
2215; ZNVER1: # BB#0:
2216; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2217; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2218; ZNVER1-NEXT: retq # sched: [1:0.50]
2219 %1 = icmp eq <8 x i32> %a0, %a1
2220 %2 = sext <8 x i1> %1 to <8 x i32>
2221 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2222 %4 = icmp eq <8 x i32> %2, %3
2223 %5 = sext <8 x i1> %4 to <8 x i32>
2224 ret <8 x i32> %5
2225}
2226
2227define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2228; GENERIC-LABEL: test_pcmpeqq:
2229; GENERIC: # BB#0:
2230; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2231; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2232; GENERIC-NEXT: retq # sched: [1:1.00]
2233;
2234; HASWELL-LABEL: test_pcmpeqq:
2235; HASWELL: # BB#0:
2236; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2237; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2238; HASWELL-NEXT: retq # sched: [2:1.00]
2239;
Gadi Haber85d99b42017-10-17 13:45:39 +00002240; BROADWELL-LABEL: test_pcmpeqq:
2241; BROADWELL: # BB#0:
2242; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002243; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2244; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002245;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002246; SKYLAKE-LABEL: test_pcmpeqq:
2247; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002248; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2249; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2250; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002251;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002252; SKX-LABEL: test_pcmpeqq:
2253; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002254; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002255; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002256; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002257; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002258; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002259;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002260; ZNVER1-LABEL: test_pcmpeqq:
2261; ZNVER1: # BB#0:
2262; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2263; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2264; ZNVER1-NEXT: retq # sched: [1:0.50]
2265 %1 = icmp eq <4 x i64> %a0, %a1
2266 %2 = sext <4 x i1> %1 to <4 x i64>
2267 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2268 %4 = icmp eq <4 x i64> %2, %3
2269 %5 = sext <4 x i1> %4 to <4 x i64>
2270 ret <4 x i64> %5
2271}
2272
2273define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2274; GENERIC-LABEL: test_pcmpeqw:
2275; GENERIC: # BB#0:
2276; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2277; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2278; GENERIC-NEXT: retq # sched: [1:1.00]
2279;
2280; HASWELL-LABEL: test_pcmpeqw:
2281; HASWELL: # BB#0:
2282; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2283; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2284; HASWELL-NEXT: retq # sched: [2:1.00]
2285;
Gadi Haber85d99b42017-10-17 13:45:39 +00002286; BROADWELL-LABEL: test_pcmpeqw:
2287; BROADWELL: # BB#0:
2288; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002289; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2290; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002291;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002292; SKYLAKE-LABEL: test_pcmpeqw:
2293; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002294; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2295; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2296; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002297;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002298; SKX-LABEL: test_pcmpeqw:
2299; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002300; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002301; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002302; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002303; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002304; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002305;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002306; ZNVER1-LABEL: test_pcmpeqw:
2307; ZNVER1: # BB#0:
2308; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2309; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2310; ZNVER1-NEXT: retq # sched: [1:0.50]
2311 %1 = icmp eq <16 x i16> %a0, %a1
2312 %2 = sext <16 x i1> %1 to <16 x i16>
2313 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2314 %4 = icmp eq <16 x i16> %2, %3
2315 %5 = sext <16 x i1> %4 to <16 x i16>
2316 ret <16 x i16> %5
2317}
2318
2319define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2320; GENERIC-LABEL: test_pcmpgtb:
2321; GENERIC: # BB#0:
2322; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2323; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2324; GENERIC-NEXT: retq # sched: [1:1.00]
2325;
2326; HASWELL-LABEL: test_pcmpgtb:
2327; HASWELL: # BB#0:
2328; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2329; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2330; HASWELL-NEXT: retq # sched: [2:1.00]
2331;
Gadi Haber85d99b42017-10-17 13:45:39 +00002332; BROADWELL-LABEL: test_pcmpgtb:
2333; BROADWELL: # BB#0:
2334; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002335; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2336; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002337;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002338; SKYLAKE-LABEL: test_pcmpgtb:
2339; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002340; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2341; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2342; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002343;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002344; SKX-LABEL: test_pcmpgtb:
2345; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002346; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002347; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002348; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002349; SKX-NEXT: vpmovm2b %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002350; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002351;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002352; ZNVER1-LABEL: test_pcmpgtb:
2353; ZNVER1: # BB#0:
2354; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2355; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2356; ZNVER1-NEXT: retq # sched: [1:0.50]
2357 %1 = icmp sgt <32 x i8> %a0, %a1
2358 %2 = sext <32 x i1> %1 to <32 x i8>
2359 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2360 %4 = icmp sgt <32 x i8> %2, %3
2361 %5 = sext <32 x i1> %4 to <32 x i8>
2362 ret <32 x i8> %5
2363}
2364
2365define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2366; GENERIC-LABEL: test_pcmpgtd:
2367; GENERIC: # BB#0:
2368; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2369; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2370; GENERIC-NEXT: retq # sched: [1:1.00]
2371;
2372; HASWELL-LABEL: test_pcmpgtd:
2373; HASWELL: # BB#0:
2374; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2375; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2376; HASWELL-NEXT: retq # sched: [2:1.00]
2377;
Gadi Haber85d99b42017-10-17 13:45:39 +00002378; BROADWELL-LABEL: test_pcmpgtd:
2379; BROADWELL: # BB#0:
2380; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002381; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2382; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002383;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002384; SKYLAKE-LABEL: test_pcmpgtd:
2385; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002386; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2387; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2388; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002389;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002390; SKX-LABEL: test_pcmpgtd:
2391; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002392; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002393; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002394; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002395; SKX-NEXT: vpmovm2d %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002396; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002397;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002398; ZNVER1-LABEL: test_pcmpgtd:
2399; ZNVER1: # BB#0:
2400; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2401; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2402; ZNVER1-NEXT: retq # sched: [1:0.50]
2403 %1 = icmp sgt <8 x i32> %a0, %a1
2404 %2 = sext <8 x i1> %1 to <8 x i32>
2405 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2406 %4 = icmp sgt <8 x i32> %2, %3
2407 %5 = sext <8 x i1> %4 to <8 x i32>
2408 ret <8 x i32> %5
2409}
2410
2411define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2412; GENERIC-LABEL: test_pcmpgtq:
2413; GENERIC: # BB#0:
2414; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2415; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2416; GENERIC-NEXT: retq # sched: [1:1.00]
2417;
2418; HASWELL-LABEL: test_pcmpgtq:
2419; HASWELL: # BB#0:
2420; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2421; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2422; HASWELL-NEXT: retq # sched: [2:1.00]
2423;
Gadi Haber85d99b42017-10-17 13:45:39 +00002424; BROADWELL-LABEL: test_pcmpgtq:
2425; BROADWELL: # BB#0:
2426; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002427; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2428; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002429;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002430; SKYLAKE-LABEL: test_pcmpgtq:
2431; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002432; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002433; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2434; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002435;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002436; SKX-LABEL: test_pcmpgtq:
2437; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002438; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002439; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002440; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002441; SKX-NEXT: vpmovm2q %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002442; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002443;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002444; ZNVER1-LABEL: test_pcmpgtq:
2445; ZNVER1: # BB#0:
2446; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2447; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2448; ZNVER1-NEXT: retq # sched: [1:0.50]
2449 %1 = icmp sgt <4 x i64> %a0, %a1
2450 %2 = sext <4 x i1> %1 to <4 x i64>
2451 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2452 %4 = icmp sgt <4 x i64> %2, %3
2453 %5 = sext <4 x i1> %4 to <4 x i64>
2454 ret <4 x i64> %5
2455}
2456
2457define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2458; GENERIC-LABEL: test_pcmpgtw:
2459; GENERIC: # BB#0:
2460; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2461; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2462; GENERIC-NEXT: retq # sched: [1:1.00]
2463;
2464; HASWELL-LABEL: test_pcmpgtw:
2465; HASWELL: # BB#0:
2466; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2467; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2468; HASWELL-NEXT: retq # sched: [2:1.00]
2469;
Gadi Haber85d99b42017-10-17 13:45:39 +00002470; BROADWELL-LABEL: test_pcmpgtw:
2471; BROADWELL: # BB#0:
2472; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002473; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2474; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002475;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002476; SKYLAKE-LABEL: test_pcmpgtw:
2477; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002478; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2479; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2480; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002481;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002482; SKX-LABEL: test_pcmpgtw:
2483; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002484; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002485; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002486; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002487; SKX-NEXT: vpmovm2w %k0, %ymm0
Gadi Haber684944b2017-10-08 12:52:54 +00002488; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002489;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002490; ZNVER1-LABEL: test_pcmpgtw:
2491; ZNVER1: # BB#0:
2492; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2493; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2494; ZNVER1-NEXT: retq # sched: [1:0.50]
2495 %1 = icmp sgt <16 x i16> %a0, %a1
2496 %2 = sext <16 x i1> %1 to <16 x i16>
2497 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2498 %4 = icmp sgt <16 x i16> %2, %3
2499 %5 = sext <16 x i1> %4 to <16 x i16>
2500 ret <16 x i16> %5
2501}
2502
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002503define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2504; GENERIC-LABEL: test_perm2i128:
2505; GENERIC: # BB#0:
2506; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2507; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
2508; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2509; GENERIC-NEXT: retq # sched: [1:1.00]
2510;
2511; HASWELL-LABEL: test_perm2i128:
2512; HASWELL: # BB#0:
2513; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2514; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
2515; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2516; HASWELL-NEXT: retq # sched: [2:1.00]
2517;
Gadi Haber85d99b42017-10-17 13:45:39 +00002518; BROADWELL-LABEL: test_perm2i128:
2519; BROADWELL: # BB#0:
2520; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002521; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002522; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002523; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002524;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002525; SKYLAKE-LABEL: test_perm2i128:
2526; SKYLAKE: # BB#0:
2527; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002528; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2529; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2530; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002531;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002532; SKX-LABEL: test_perm2i128:
2533; SKX: # BB#0:
2534; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002535; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2536; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2537; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002538;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002539; ZNVER1-LABEL: test_perm2i128:
2540; ZNVER1: # BB#0:
2541; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2542; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2543; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2544; ZNVER1-NEXT: retq # sched: [1:0.50]
2545 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2546 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2547 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2548 %4 = add <4 x i64> %1, %3
2549 ret <4 x i64> %4
2550}
2551
2552define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2553; GENERIC-LABEL: test_permd:
2554; GENERIC: # BB#0:
2555; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2556; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2557; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2558; GENERIC-NEXT: retq # sched: [1:1.00]
2559;
2560; HASWELL-LABEL: test_permd:
2561; HASWELL: # BB#0:
2562; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2563; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2564; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2565; HASWELL-NEXT: retq # sched: [2:1.00]
2566;
Gadi Haber85d99b42017-10-17 13:45:39 +00002567; BROADWELL-LABEL: test_permd:
2568; BROADWELL: # BB#0:
2569; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002570; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002571; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002572; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002573;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002574; SKYLAKE-LABEL: test_permd:
2575; SKYLAKE: # BB#0:
2576; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002577; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2578; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2579; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002580;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002581; SKX-LABEL: test_permd:
2582; SKX: # BB#0:
2583; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002584; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2585; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2586; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002587;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002588; ZNVER1-LABEL: test_permd:
2589; ZNVER1: # BB#0:
2590; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2591; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2592; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2593; ZNVER1-NEXT: retq # sched: [1:0.50]
2594 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2595 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2596 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2597 %4 = add <8 x i32> %1, %3
2598 ret <8 x i32> %4
2599}
2600declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2601
2602define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2603; GENERIC-LABEL: test_permpd:
2604; GENERIC: # BB#0:
2605; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2606; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2607; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2608; GENERIC-NEXT: retq # sched: [1:1.00]
2609;
2610; HASWELL-LABEL: test_permpd:
2611; HASWELL: # BB#0:
2612; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2613; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2614; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2615; HASWELL-NEXT: retq # sched: [2:1.00]
2616;
Gadi Haber85d99b42017-10-17 13:45:39 +00002617; BROADWELL-LABEL: test_permpd:
2618; BROADWELL: # BB#0:
2619; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002620; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002621; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002622; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002623;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002624; SKYLAKE-LABEL: test_permpd:
2625; SKYLAKE: # BB#0:
2626; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002627; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002628; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002629; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002630;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002631; SKX-LABEL: test_permpd:
2632; SKX: # BB#0:
2633; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002634; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2635; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2636; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002637;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002638; ZNVER1-LABEL: test_permpd:
2639; ZNVER1: # BB#0:
2640; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2641; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2642; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2643; ZNVER1-NEXT: retq # sched: [1:0.50]
2644 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2645 %2 = load <4 x double>, <4 x double> *%a1, align 32
2646 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2647 %4 = fadd <4 x double> %1, %3
2648 ret <4 x double> %4
2649}
2650
2651define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2652; GENERIC-LABEL: test_permps:
2653; GENERIC: # BB#0:
2654; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2655; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2656; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2657; GENERIC-NEXT: retq # sched: [1:1.00]
2658;
2659; HASWELL-LABEL: test_permps:
2660; HASWELL: # BB#0:
2661; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2662; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2663; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2664; HASWELL-NEXT: retq # sched: [2:1.00]
2665;
Gadi Haber85d99b42017-10-17 13:45:39 +00002666; BROADWELL-LABEL: test_permps:
2667; BROADWELL: # BB#0:
2668; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002669; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002670; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002671; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002672;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002673; SKYLAKE-LABEL: test_permps:
2674; SKYLAKE: # BB#0:
2675; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002676; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002677; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002678; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002679;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002680; SKX-LABEL: test_permps:
2681; SKX: # BB#0:
2682; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002683; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2684; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
2685; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002686;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002687; ZNVER1-LABEL: test_permps:
2688; ZNVER1: # BB#0:
2689; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2690; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2691; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2692; ZNVER1-NEXT: retq # sched: [1:0.50]
2693 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2694 %2 = load <8 x float>, <8 x float> *%a2, align 32
2695 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2696 %4 = fadd <8 x float> %1, %3
2697 ret <8 x float> %4
2698}
2699declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2700
2701define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2702; GENERIC-LABEL: test_permq:
2703; GENERIC: # BB#0:
2704; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2705; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2706; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2707; GENERIC-NEXT: retq # sched: [1:1.00]
2708;
2709; HASWELL-LABEL: test_permq:
2710; HASWELL: # BB#0:
2711; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2712; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2713; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2714; HASWELL-NEXT: retq # sched: [2:1.00]
2715;
Gadi Haber85d99b42017-10-17 13:45:39 +00002716; BROADWELL-LABEL: test_permq:
2717; BROADWELL: # BB#0:
2718; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002719; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002720; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002721; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002722;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002723; SKYLAKE-LABEL: test_permq:
2724; SKYLAKE: # BB#0:
2725; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002726; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2727; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2728; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002729;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002730; SKX-LABEL: test_permq:
2731; SKX: # BB#0:
2732; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002733; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2734; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2735; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002736;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002737; ZNVER1-LABEL: test_permq:
2738; ZNVER1: # BB#0:
2739; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2740; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2741; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2742; ZNVER1-NEXT: retq # sched: [1:0.50]
2743 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2744 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2745 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2746 %4 = add <4 x i64> %1, %3
2747 ret <4 x i64> %4
2748}
2749
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002750define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2751; GENERIC-LABEL: test_pgatherdd:
2752; GENERIC: # BB#0:
2753; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
2754; GENERIC-NEXT: retq # sched: [1:1.00]
2755;
2756; HASWELL-LABEL: test_pgatherdd:
2757; HASWELL: # BB#0:
2758; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2759; HASWELL-NEXT: retq # sched: [2:1.00]
2760;
Gadi Haber85d99b42017-10-17 13:45:39 +00002761; BROADWELL-LABEL: test_pgatherdd:
2762; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002763; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
2764; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002765;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002766; SKYLAKE-LABEL: test_pgatherdd:
2767; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002768; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2769; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002770;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002771; SKX-LABEL: test_pgatherdd:
2772; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002773; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2774; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002775;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002776; ZNVER1-LABEL: test_pgatherdd:
2777; ZNVER1: # BB#0:
2778; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2779; ZNVER1-NEXT: retq # sched: [1:0.50]
2780 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2781 ret <4 x i32> %1
2782}
2783declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2784
2785define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2786; GENERIC-LABEL: test_pgatherdd_ymm:
2787; GENERIC: # BB#0:
2788; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
2789; GENERIC-NEXT: retq # sched: [1:1.00]
2790;
2791; HASWELL-LABEL: test_pgatherdd_ymm:
2792; HASWELL: # BB#0:
2793; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
2794; HASWELL-NEXT: retq # sched: [2:1.00]
2795;
Gadi Haber85d99b42017-10-17 13:45:39 +00002796; BROADWELL-LABEL: test_pgatherdd_ymm:
2797; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002798; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
2799; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002800;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002801; SKYLAKE-LABEL: test_pgatherdd_ymm:
2802; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002803; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2804; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002805;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002806; SKX-LABEL: test_pgatherdd_ymm:
2807; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002808; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2809; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002810;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002811; ZNVER1-LABEL: test_pgatherdd_ymm:
2812; ZNVER1: # BB#0:
2813; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2814; ZNVER1-NEXT: retq # sched: [1:0.50]
2815 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2816 ret <8 x i32> %1
2817}
2818declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2819
2820define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2821; GENERIC-LABEL: test_pgatherdq:
2822; GENERIC: # BB#0:
2823; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
2824; GENERIC-NEXT: retq # sched: [1:1.00]
2825;
2826; HASWELL-LABEL: test_pgatherdq:
2827; HASWELL: # BB#0:
2828; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2829; HASWELL-NEXT: retq # sched: [2:1.00]
2830;
Gadi Haber85d99b42017-10-17 13:45:39 +00002831; BROADWELL-LABEL: test_pgatherdq:
2832; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002833; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
2834; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002835;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002836; SKYLAKE-LABEL: test_pgatherdq:
2837; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002838; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2839; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002840;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002841; SKX-LABEL: test_pgatherdq:
2842; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002843; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2844; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002845;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002846; ZNVER1-LABEL: test_pgatherdq:
2847; ZNVER1: # BB#0:
2848; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2849; ZNVER1-NEXT: retq # sched: [1:0.50]
2850 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2851 ret <2 x i64> %1
2852}
2853declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2854
2855define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2856; GENERIC-LABEL: test_pgatherdq_ymm:
2857; GENERIC: # BB#0:
2858; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
2859; GENERIC-NEXT: retq # sched: [1:1.00]
2860;
2861; HASWELL-LABEL: test_pgatherdq_ymm:
2862; HASWELL: # BB#0:
2863; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?]
2864; HASWELL-NEXT: retq # sched: [2:1.00]
2865;
Gadi Haber85d99b42017-10-17 13:45:39 +00002866; BROADWELL-LABEL: test_pgatherdq_ymm:
2867; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002868; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
2869; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002870;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002871; SKYLAKE-LABEL: test_pgatherdq_ymm:
2872; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002873; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2874; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002875;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002876; SKX-LABEL: test_pgatherdq_ymm:
2877; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002878; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2879; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002880;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002881; ZNVER1-LABEL: test_pgatherdq_ymm:
2882; ZNVER1: # BB#0:
2883; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
2884; ZNVER1-NEXT: retq # sched: [1:0.50]
2885 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2886 ret <4 x i64> %1
2887}
2888declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2889
2890define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2891; GENERIC-LABEL: test_pgatherqd:
2892; GENERIC: # BB#0:
2893; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
2894; GENERIC-NEXT: retq # sched: [1:1.00]
2895;
2896; HASWELL-LABEL: test_pgatherqd:
2897; HASWELL: # BB#0:
2898; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2899; HASWELL-NEXT: retq # sched: [2:1.00]
2900;
Gadi Haber85d99b42017-10-17 13:45:39 +00002901; BROADWELL-LABEL: test_pgatherqd:
2902; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002903; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
2904; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002905;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002906; SKYLAKE-LABEL: test_pgatherqd:
2907; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002908; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2909; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002910;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002911; SKX-LABEL: test_pgatherqd:
2912; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002913; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2914; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002915;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002916; ZNVER1-LABEL: test_pgatherqd:
2917; ZNVER1: # BB#0:
2918; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2919; ZNVER1-NEXT: retq # sched: [1:0.50]
2920 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2921 ret <4 x i32> %1
2922}
2923declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2924
2925define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2926; GENERIC-LABEL: test_pgatherqd_ymm:
2927; GENERIC: # BB#0:
2928; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
2929; GENERIC-NEXT: vzeroupper
2930; GENERIC-NEXT: retq # sched: [1:1.00]
2931;
2932; HASWELL-LABEL: test_pgatherqd_ymm:
2933; HASWELL: # BB#0:
2934; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?]
2935; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2936; HASWELL-NEXT: retq # sched: [2:1.00]
2937;
Gadi Haber85d99b42017-10-17 13:45:39 +00002938; BROADWELL-LABEL: test_pgatherqd_ymm:
2939; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002940; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
Gadi Haber85d99b42017-10-17 13:45:39 +00002941; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002942; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002943;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002944; SKYLAKE-LABEL: test_pgatherqd_ymm:
2945; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002946; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002947; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002948; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002949;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002950; SKX-LABEL: test_pgatherqd_ymm:
2951; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002952; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002953; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002954; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002955;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002956; ZNVER1-LABEL: test_pgatherqd_ymm:
2957; ZNVER1: # BB#0:
2958; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
2959; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2960; ZNVER1-NEXT: retq # sched: [1:0.50]
2961 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2962 ret <4 x i32> %1
2963}
2964declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2965
2966define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2967; GENERIC-LABEL: test_pgatherqq:
2968; GENERIC: # BB#0:
2969; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
2970; GENERIC-NEXT: retq # sched: [1:1.00]
2971;
2972; HASWELL-LABEL: test_pgatherqq:
2973; HASWELL: # BB#0:
2974; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2975; HASWELL-NEXT: retq # sched: [2:1.00]
2976;
Gadi Haber85d99b42017-10-17 13:45:39 +00002977; BROADWELL-LABEL: test_pgatherqq:
2978; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00002979; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
2980; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002981;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002982; SKYLAKE-LABEL: test_pgatherqq:
2983; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002984; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2985; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002986;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002987; SKX-LABEL: test_pgatherqq:
2988; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00002989; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2990; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002991;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002992; ZNVER1-LABEL: test_pgatherqq:
2993; ZNVER1: # BB#0:
2994; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2995; ZNVER1-NEXT: retq # sched: [1:0.50]
2996 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2997 ret <2 x i64> %1
2998}
2999declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
3000
3001define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
3002; GENERIC-LABEL: test_pgatherqq_ymm:
3003; GENERIC: # BB#0:
3004; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
3005; GENERIC-NEXT: retq # sched: [1:1.00]
3006;
3007; HASWELL-LABEL: test_pgatherqq_ymm:
3008; HASWELL: # BB#0:
3009; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
3010; HASWELL-NEXT: retq # sched: [2:1.00]
3011;
Gadi Haber85d99b42017-10-17 13:45:39 +00003012; BROADWELL-LABEL: test_pgatherqq_ymm:
3013; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003014; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
3015; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003016;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003017; SKYLAKE-LABEL: test_pgatherqq_ymm:
3018; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003019; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3020; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003021;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003022; SKX-LABEL: test_pgatherqq_ymm:
3023; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003024; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3025; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003026;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003027; ZNVER1-LABEL: test_pgatherqq_ymm:
3028; ZNVER1: # BB#0:
3029; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
3030; ZNVER1-NEXT: retq # sched: [1:0.50]
3031 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
3032 ret <4 x i64> %1
3033}
3034declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
3035
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003036define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3037; GENERIC-LABEL: test_phaddd:
3038; GENERIC: # BB#0:
3039; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3040; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3041; GENERIC-NEXT: retq # sched: [1:1.00]
3042;
3043; HASWELL-LABEL: test_phaddd:
3044; HASWELL: # BB#0:
3045; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3046; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3047; HASWELL-NEXT: retq # sched: [2:1.00]
3048;
Gadi Haber85d99b42017-10-17 13:45:39 +00003049; BROADWELL-LABEL: test_phaddd:
3050; BROADWELL: # BB#0:
3051; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003052; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3053; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003054;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003055; SKYLAKE-LABEL: test_phaddd:
3056; SKYLAKE: # BB#0:
3057; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003058; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3059; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003060;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003061; SKX-LABEL: test_phaddd:
3062; SKX: # BB#0:
3063; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003064; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3065; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003066;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003067; ZNVER1-LABEL: test_phaddd:
3068; ZNVER1: # BB#0:
3069; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3070; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3071; ZNVER1-NEXT: retq # sched: [1:0.50]
3072 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
3073 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3074 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
3075 ret <8 x i32> %3
3076}
3077declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
3078
3079define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3080; GENERIC-LABEL: test_phaddsw:
3081; GENERIC: # BB#0:
3082; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3083; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3084; GENERIC-NEXT: retq # sched: [1:1.00]
3085;
3086; HASWELL-LABEL: test_phaddsw:
3087; HASWELL: # BB#0:
3088; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3089; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3090; HASWELL-NEXT: retq # sched: [2:1.00]
3091;
Gadi Haber85d99b42017-10-17 13:45:39 +00003092; BROADWELL-LABEL: test_phaddsw:
3093; BROADWELL: # BB#0:
3094; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003095; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3096; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003097;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003098; SKYLAKE-LABEL: test_phaddsw:
3099; SKYLAKE: # BB#0:
3100; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003101; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3102; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003103;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003104; SKX-LABEL: test_phaddsw:
3105; SKX: # BB#0:
3106; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003107; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3108; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003109;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003110; ZNVER1-LABEL: test_phaddsw:
3111; ZNVER1: # BB#0:
3112; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3113; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3114; ZNVER1-NEXT: retq # sched: [1:0.50]
3115 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
3116 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3117 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
3118 ret <16 x i16> %3
3119}
3120declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
3121
3122define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3123; GENERIC-LABEL: test_phaddw:
3124; GENERIC: # BB#0:
3125; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3126; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3127; GENERIC-NEXT: retq # sched: [1:1.00]
3128;
3129; HASWELL-LABEL: test_phaddw:
3130; HASWELL: # BB#0:
3131; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3132; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3133; HASWELL-NEXT: retq # sched: [2:1.00]
3134;
Gadi Haber85d99b42017-10-17 13:45:39 +00003135; BROADWELL-LABEL: test_phaddw:
3136; BROADWELL: # BB#0:
3137; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003138; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3139; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003140;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003141; SKYLAKE-LABEL: test_phaddw:
3142; SKYLAKE: # BB#0:
3143; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003144; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3145; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003146;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003147; SKX-LABEL: test_phaddw:
3148; SKX: # BB#0:
3149; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003150; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3151; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003152;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003153; ZNVER1-LABEL: test_phaddw:
3154; ZNVER1: # BB#0:
3155; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3156; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3157; ZNVER1-NEXT: retq # sched: [1:0.50]
3158 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
3159 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3160 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
3161 ret <16 x i16> %3
3162}
3163declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
3164
3165define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3166; GENERIC-LABEL: test_phsubd:
3167; GENERIC: # BB#0:
3168; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3169; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3170; GENERIC-NEXT: retq # sched: [1:1.00]
3171;
3172; HASWELL-LABEL: test_phsubd:
3173; HASWELL: # BB#0:
3174; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3175; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3176; HASWELL-NEXT: retq # sched: [2:1.00]
3177;
Gadi Haber85d99b42017-10-17 13:45:39 +00003178; BROADWELL-LABEL: test_phsubd:
3179; BROADWELL: # BB#0:
3180; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003181; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3182; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003183;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003184; SKYLAKE-LABEL: test_phsubd:
3185; SKYLAKE: # BB#0:
3186; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003187; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3188; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003189;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003190; SKX-LABEL: test_phsubd:
3191; SKX: # BB#0:
3192; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003193; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3194; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003195;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003196; ZNVER1-LABEL: test_phsubd:
3197; ZNVER1: # BB#0:
3198; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3199; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3200; ZNVER1-NEXT: retq # sched: [1:0.50]
3201 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
3202 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3203 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
3204 ret <8 x i32> %3
3205}
3206declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
3207
3208define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3209; GENERIC-LABEL: test_phsubsw:
3210; GENERIC: # BB#0:
3211; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3212; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3213; GENERIC-NEXT: retq # sched: [1:1.00]
3214;
3215; HASWELL-LABEL: test_phsubsw:
3216; HASWELL: # BB#0:
3217; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3218; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3219; HASWELL-NEXT: retq # sched: [2:1.00]
3220;
Gadi Haber85d99b42017-10-17 13:45:39 +00003221; BROADWELL-LABEL: test_phsubsw:
3222; BROADWELL: # BB#0:
3223; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003224; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3225; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003226;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003227; SKYLAKE-LABEL: test_phsubsw:
3228; SKYLAKE: # BB#0:
3229; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003230; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3231; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003232;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003233; SKX-LABEL: test_phsubsw:
3234; SKX: # BB#0:
3235; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003236; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3237; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003238;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003239; ZNVER1-LABEL: test_phsubsw:
3240; ZNVER1: # BB#0:
3241; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3242; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3243; ZNVER1-NEXT: retq # sched: [1:0.50]
3244 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
3245 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3246 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
3247 ret <16 x i16> %3
3248}
3249declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
3250
3251define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3252; GENERIC-LABEL: test_phsubw:
3253; GENERIC: # BB#0:
3254; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3255; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3256; GENERIC-NEXT: retq # sched: [1:1.00]
3257;
3258; HASWELL-LABEL: test_phsubw:
3259; HASWELL: # BB#0:
3260; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3261; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3262; HASWELL-NEXT: retq # sched: [2:1.00]
3263;
Gadi Haber85d99b42017-10-17 13:45:39 +00003264; BROADWELL-LABEL: test_phsubw:
3265; BROADWELL: # BB#0:
3266; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003267; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3268; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003269;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003270; SKYLAKE-LABEL: test_phsubw:
3271; SKYLAKE: # BB#0:
3272; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003273; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3274; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003275;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003276; SKX-LABEL: test_phsubw:
3277; SKX: # BB#0:
3278; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003279; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3280; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003281;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003282; ZNVER1-LABEL: test_phsubw:
3283; ZNVER1: # BB#0:
3284; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3285; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3286; ZNVER1-NEXT: retq # sched: [1:0.50]
3287 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
3288 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3289 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
3290 ret <16 x i16> %3
3291}
3292declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
3293
3294define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3295; GENERIC-LABEL: test_pmaddubsw:
3296; GENERIC: # BB#0:
3297; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3298; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3299; GENERIC-NEXT: retq # sched: [1:1.00]
3300;
3301; HASWELL-LABEL: test_pmaddubsw:
3302; HASWELL: # BB#0:
3303; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3304; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
3305; HASWELL-NEXT: retq # sched: [2:1.00]
3306;
Gadi Haber85d99b42017-10-17 13:45:39 +00003307; BROADWELL-LABEL: test_pmaddubsw:
3308; BROADWELL: # BB#0:
3309; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003310; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3311; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003312;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003313; SKYLAKE-LABEL: test_pmaddubsw:
3314; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003315; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003316; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3317; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003318;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003319; SKX-LABEL: test_pmaddubsw:
3320; SKX: # BB#0:
3321; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003322; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3323; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003324;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003325; ZNVER1-LABEL: test_pmaddubsw:
3326; ZNVER1: # BB#0:
3327; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3328; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3329; ZNVER1-NEXT: retq # sched: [1:0.50]
3330 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
3331 %2 = bitcast <16 x i16> %1 to <32 x i8>
3332 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
3333 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
3334 ret <16 x i16> %4
3335}
3336declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
3337
3338define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3339; GENERIC-LABEL: test_pmaddwd:
3340; GENERIC: # BB#0:
3341; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3342; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3343; GENERIC-NEXT: retq # sched: [1:1.00]
3344;
3345; HASWELL-LABEL: test_pmaddwd:
3346; HASWELL: # BB#0:
3347; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3348; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
3349; HASWELL-NEXT: retq # sched: [2:1.00]
3350;
Gadi Haber85d99b42017-10-17 13:45:39 +00003351; BROADWELL-LABEL: test_pmaddwd:
3352; BROADWELL: # BB#0:
3353; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003354; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3355; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003356;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003357; SKYLAKE-LABEL: test_pmaddwd:
3358; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003359; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003360; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3361; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003362;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003363; SKX-LABEL: test_pmaddwd:
3364; SKX: # BB#0:
3365; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003366; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3367; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003368;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003369; ZNVER1-LABEL: test_pmaddwd:
3370; ZNVER1: # BB#0:
3371; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3372; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3373; ZNVER1-NEXT: retq # sched: [1:0.50]
3374 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
3375 %2 = bitcast <8 x i32> %1 to <16 x i16>
3376 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
3377 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
3378 ret <8 x i32> %4
3379}
3380declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
3381
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003382define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
3383; GENERIC-LABEL: test_pmaskmovd:
3384; GENERIC: # BB#0:
3385; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
3386; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi)
3387; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3388; GENERIC-NEXT: retq # sched: [1:1.00]
3389;
3390; HASWELL-LABEL: test_pmaskmovd:
3391; HASWELL: # BB#0:
3392; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
3393; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
3394; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3395; HASWELL-NEXT: retq # sched: [2:1.00]
3396;
Gadi Haber85d99b42017-10-17 13:45:39 +00003397; BROADWELL-LABEL: test_pmaskmovd:
3398; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003399; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3400; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003401; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003402; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003403;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003404; SKYLAKE-LABEL: test_pmaskmovd:
3405; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003406; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3407; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003408; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003409; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003410;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003411; SKX-LABEL: test_pmaskmovd:
3412; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003413; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3414; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003415; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003416; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003417;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003418; ZNVER1-LABEL: test_pmaskmovd:
3419; ZNVER1: # BB#0:
3420; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
3421; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:?]
3422; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3423; ZNVER1-NEXT: retq # sched: [1:0.50]
3424 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
3425 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
3426 ret <4 x i32> %1
3427}
3428declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
3429declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
3430
3431define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
3432; GENERIC-LABEL: test_pmaskmovd_ymm:
3433; GENERIC: # BB#0:
3434; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2
3435; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
3436; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3437; GENERIC-NEXT: retq # sched: [1:1.00]
3438;
3439; HASWELL-LABEL: test_pmaskmovd_ymm:
3440; HASWELL: # BB#0:
3441; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
3442; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
3443; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3444; HASWELL-NEXT: retq # sched: [2:1.00]
3445;
Gadi Haber85d99b42017-10-17 13:45:39 +00003446; BROADWELL-LABEL: test_pmaskmovd_ymm:
3447; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003448; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3449; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003450; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003451; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003452;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003453; SKYLAKE-LABEL: test_pmaskmovd_ymm:
3454; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003455; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3456; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003457; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003458; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003459;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003460; SKX-LABEL: test_pmaskmovd_ymm:
3461; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003462; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3463; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003464; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003465; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003466;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003467; ZNVER1-LABEL: test_pmaskmovd_ymm:
3468; ZNVER1: # BB#0:
3469; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
3470; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:?]
3471; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3472; ZNVER1-NEXT: retq # sched: [1:0.50]
3473 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
3474 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
3475 ret <8 x i32> %1
3476}
3477declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
3478declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
3479
3480define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
3481; GENERIC-LABEL: test_pmaskmovq:
3482; GENERIC: # BB#0:
3483; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2
3484; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi)
3485; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3486; GENERIC-NEXT: retq # sched: [1:1.00]
3487;
3488; HASWELL-LABEL: test_pmaskmovq:
3489; HASWELL: # BB#0:
3490; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
3491; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
3492; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3493; HASWELL-NEXT: retq # sched: [2:1.00]
3494;
Gadi Haber85d99b42017-10-17 13:45:39 +00003495; BROADWELL-LABEL: test_pmaskmovq:
3496; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003497; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3498; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003499; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003500; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003501;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003502; SKYLAKE-LABEL: test_pmaskmovq:
3503; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003504; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3505; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003506; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003507; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003508;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003509; SKX-LABEL: test_pmaskmovq:
3510; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003511; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3512; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003513; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003514; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003515;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003516; ZNVER1-LABEL: test_pmaskmovq:
3517; ZNVER1: # BB#0:
3518; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3519; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:?]
3520; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3521; ZNVER1-NEXT: retq # sched: [1:0.50]
3522 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3523 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3524 ret <2 x i64> %1
3525}
3526declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3527declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3528
3529define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3530; GENERIC-LABEL: test_pmaskmovq_ymm:
3531; GENERIC: # BB#0:
3532; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2
3533; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi)
3534; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3535; GENERIC-NEXT: retq # sched: [1:1.00]
3536;
3537; HASWELL-LABEL: test_pmaskmovq_ymm:
3538; HASWELL: # BB#0:
3539; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
3540; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
3541; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3542; HASWELL-NEXT: retq # sched: [2:1.00]
3543;
Gadi Haber85d99b42017-10-17 13:45:39 +00003544; BROADWELL-LABEL: test_pmaskmovq_ymm:
3545; BROADWELL: # BB#0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003546; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3547; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003548; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber323f2e12017-10-24 20:19:47 +00003549; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003550;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003551; SKYLAKE-LABEL: test_pmaskmovq_ymm:
3552; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003553; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3554; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003555; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003556; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003557;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003558; SKX-LABEL: test_pmaskmovq_ymm:
3559; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003560; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3561; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003562; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
Gadi Haber684944b2017-10-08 12:52:54 +00003563; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003564;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003565; ZNVER1-LABEL: test_pmaskmovq_ymm:
3566; ZNVER1: # BB#0:
3567; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3568; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:?]
3569; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3570; ZNVER1-NEXT: retq # sched: [1:0.50]
3571 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3572 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3573 ret <4 x i64> %1
3574}
3575declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3576declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3577
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003578define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3579; GENERIC-LABEL: test_pmaxsb:
3580; GENERIC: # BB#0:
3581; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3582; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3583; GENERIC-NEXT: retq # sched: [1:1.00]
3584;
3585; HASWELL-LABEL: test_pmaxsb:
3586; HASWELL: # BB#0:
3587; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3588; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3589; HASWELL-NEXT: retq # sched: [2:1.00]
3590;
Gadi Haber85d99b42017-10-17 13:45:39 +00003591; BROADWELL-LABEL: test_pmaxsb:
3592; BROADWELL: # BB#0:
3593; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003594; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3595; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003596;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003597; SKYLAKE-LABEL: test_pmaxsb:
3598; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003599; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3600; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3601; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003602;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003603; SKX-LABEL: test_pmaxsb:
3604; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003605; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3606; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3607; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003608;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003609; ZNVER1-LABEL: test_pmaxsb:
3610; ZNVER1: # BB#0:
3611; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3612; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3613; ZNVER1-NEXT: retq # sched: [1:0.50]
3614 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3615 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3616 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3617 ret <32 x i8> %3
3618}
3619declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3620
3621define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3622; GENERIC-LABEL: test_pmaxsd:
3623; GENERIC: # BB#0:
3624; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3625; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3626; GENERIC-NEXT: retq # sched: [1:1.00]
3627;
3628; HASWELL-LABEL: test_pmaxsd:
3629; HASWELL: # BB#0:
3630; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3631; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3632; HASWELL-NEXT: retq # sched: [2:1.00]
3633;
Gadi Haber85d99b42017-10-17 13:45:39 +00003634; BROADWELL-LABEL: test_pmaxsd:
3635; BROADWELL: # BB#0:
3636; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003637; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3638; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003639;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003640; SKYLAKE-LABEL: test_pmaxsd:
3641; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003642; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3643; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3644; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003645;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003646; SKX-LABEL: test_pmaxsd:
3647; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003648; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3649; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3650; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003651;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003652; ZNVER1-LABEL: test_pmaxsd:
3653; ZNVER1: # BB#0:
3654; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3655; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3656; ZNVER1-NEXT: retq # sched: [1:0.50]
3657 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3658 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3659 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3660 ret <8 x i32> %3
3661}
3662declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3663
3664define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3665; GENERIC-LABEL: test_pmaxsw:
3666; GENERIC: # BB#0:
3667; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3668; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3669; GENERIC-NEXT: retq # sched: [1:1.00]
3670;
3671; HASWELL-LABEL: test_pmaxsw:
3672; HASWELL: # BB#0:
3673; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3674; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3675; HASWELL-NEXT: retq # sched: [2:1.00]
3676;
Gadi Haber85d99b42017-10-17 13:45:39 +00003677; BROADWELL-LABEL: test_pmaxsw:
3678; BROADWELL: # BB#0:
3679; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003680; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3681; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003682;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003683; SKYLAKE-LABEL: test_pmaxsw:
3684; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003685; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3686; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3687; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003688;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003689; SKX-LABEL: test_pmaxsw:
3690; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003691; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3692; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3693; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003694;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003695; ZNVER1-LABEL: test_pmaxsw:
3696; ZNVER1: # BB#0:
3697; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3698; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3699; ZNVER1-NEXT: retq # sched: [1:0.50]
3700 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3701 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3702 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3703 ret <16 x i16> %3
3704}
3705declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3706
3707define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3708; GENERIC-LABEL: test_pmaxub:
3709; GENERIC: # BB#0:
3710; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3711; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3712; GENERIC-NEXT: retq # sched: [1:1.00]
3713;
3714; HASWELL-LABEL: test_pmaxub:
3715; HASWELL: # BB#0:
3716; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3717; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3718; HASWELL-NEXT: retq # sched: [2:1.00]
3719;
Gadi Haber85d99b42017-10-17 13:45:39 +00003720; BROADWELL-LABEL: test_pmaxub:
3721; BROADWELL: # BB#0:
3722; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003723; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3724; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003725;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003726; SKYLAKE-LABEL: test_pmaxub:
3727; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003728; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3729; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3730; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003731;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003732; SKX-LABEL: test_pmaxub:
3733; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003734; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3735; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3736; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003737;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003738; ZNVER1-LABEL: test_pmaxub:
3739; ZNVER1: # BB#0:
3740; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3741; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3742; ZNVER1-NEXT: retq # sched: [1:0.50]
3743 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3744 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3745 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3746 ret <32 x i8> %3
3747}
3748declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3749
3750define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3751; GENERIC-LABEL: test_pmaxud:
3752; GENERIC: # BB#0:
3753; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3754; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3755; GENERIC-NEXT: retq # sched: [1:1.00]
3756;
3757; HASWELL-LABEL: test_pmaxud:
3758; HASWELL: # BB#0:
3759; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3760; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3761; HASWELL-NEXT: retq # sched: [2:1.00]
3762;
Gadi Haber85d99b42017-10-17 13:45:39 +00003763; BROADWELL-LABEL: test_pmaxud:
3764; BROADWELL: # BB#0:
3765; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003766; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3767; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003768;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003769; SKYLAKE-LABEL: test_pmaxud:
3770; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003771; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3772; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3773; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003774;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003775; SKX-LABEL: test_pmaxud:
3776; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003777; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3778; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3779; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003780;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003781; ZNVER1-LABEL: test_pmaxud:
3782; ZNVER1: # BB#0:
3783; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3784; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3785; ZNVER1-NEXT: retq # sched: [1:0.50]
3786 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3787 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3788 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3789 ret <8 x i32> %3
3790}
3791declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3792
3793define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3794; GENERIC-LABEL: test_pmaxuw:
3795; GENERIC: # BB#0:
3796; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3797; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3798; GENERIC-NEXT: retq # sched: [1:1.00]
3799;
3800; HASWELL-LABEL: test_pmaxuw:
3801; HASWELL: # BB#0:
3802; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3803; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3804; HASWELL-NEXT: retq # sched: [2:1.00]
3805;
Gadi Haber85d99b42017-10-17 13:45:39 +00003806; BROADWELL-LABEL: test_pmaxuw:
3807; BROADWELL: # BB#0:
3808; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003809; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3810; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003811;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003812; SKYLAKE-LABEL: test_pmaxuw:
3813; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003814; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3815; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3816; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003817;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003818; SKX-LABEL: test_pmaxuw:
3819; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003820; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3821; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3822; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003823;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003824; ZNVER1-LABEL: test_pmaxuw:
3825; ZNVER1: # BB#0:
3826; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3827; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3828; ZNVER1-NEXT: retq # sched: [1:0.50]
3829 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3830 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3831 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3832 ret <16 x i16> %3
3833}
3834declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3835
3836define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3837; GENERIC-LABEL: test_pminsb:
3838; GENERIC: # BB#0:
3839; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3840; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3841; GENERIC-NEXT: retq # sched: [1:1.00]
3842;
3843; HASWELL-LABEL: test_pminsb:
3844; HASWELL: # BB#0:
3845; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3846; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3847; HASWELL-NEXT: retq # sched: [2:1.00]
3848;
Gadi Haber85d99b42017-10-17 13:45:39 +00003849; BROADWELL-LABEL: test_pminsb:
3850; BROADWELL: # BB#0:
3851; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003852; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3853; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003854;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003855; SKYLAKE-LABEL: test_pminsb:
3856; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003857; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3858; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3859; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003860;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003861; SKX-LABEL: test_pminsb:
3862; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003863; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3864; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3865; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003866;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003867; ZNVER1-LABEL: test_pminsb:
3868; ZNVER1: # BB#0:
3869; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3870; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3871; ZNVER1-NEXT: retq # sched: [1:0.50]
3872 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3873 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3874 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3875 ret <32 x i8> %3
3876}
3877declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3878
3879define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3880; GENERIC-LABEL: test_pminsd:
3881; GENERIC: # BB#0:
3882; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3883; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3884; GENERIC-NEXT: retq # sched: [1:1.00]
3885;
3886; HASWELL-LABEL: test_pminsd:
3887; HASWELL: # BB#0:
3888; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3889; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3890; HASWELL-NEXT: retq # sched: [2:1.00]
3891;
Gadi Haber85d99b42017-10-17 13:45:39 +00003892; BROADWELL-LABEL: test_pminsd:
3893; BROADWELL: # BB#0:
3894; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003895; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3896; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003897;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003898; SKYLAKE-LABEL: test_pminsd:
3899; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003900; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3901; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3902; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003903;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003904; SKX-LABEL: test_pminsd:
3905; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003906; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3907; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3908; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003909;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003910; ZNVER1-LABEL: test_pminsd:
3911; ZNVER1: # BB#0:
3912; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3913; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3914; ZNVER1-NEXT: retq # sched: [1:0.50]
3915 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3916 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3917 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3918 ret <8 x i32> %3
3919}
3920declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3921
3922define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3923; GENERIC-LABEL: test_pminsw:
3924; GENERIC: # BB#0:
3925; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3926; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3927; GENERIC-NEXT: retq # sched: [1:1.00]
3928;
3929; HASWELL-LABEL: test_pminsw:
3930; HASWELL: # BB#0:
3931; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3932; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3933; HASWELL-NEXT: retq # sched: [2:1.00]
3934;
Gadi Haber85d99b42017-10-17 13:45:39 +00003935; BROADWELL-LABEL: test_pminsw:
3936; BROADWELL: # BB#0:
3937; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003938; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3939; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003940;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003941; SKYLAKE-LABEL: test_pminsw:
3942; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003943; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3944; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3945; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003946;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003947; SKX-LABEL: test_pminsw:
3948; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003949; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3950; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3951; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003952;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003953; ZNVER1-LABEL: test_pminsw:
3954; ZNVER1: # BB#0:
3955; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3956; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3957; ZNVER1-NEXT: retq # sched: [1:0.50]
3958 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3959 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3960 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3961 ret <16 x i16> %3
3962}
3963declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3964
3965define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3966; GENERIC-LABEL: test_pminub:
3967; GENERIC: # BB#0:
3968; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3969; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3970; GENERIC-NEXT: retq # sched: [1:1.00]
3971;
3972; HASWELL-LABEL: test_pminub:
3973; HASWELL: # BB#0:
3974; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3975; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3976; HASWELL-NEXT: retq # sched: [2:1.00]
3977;
Gadi Haber85d99b42017-10-17 13:45:39 +00003978; BROADWELL-LABEL: test_pminub:
3979; BROADWELL: # BB#0:
3980; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003981; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3982; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003983;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003984; SKYLAKE-LABEL: test_pminub:
3985; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003986; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3987; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3988; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003989;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003990; SKX-LABEL: test_pminub:
3991; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00003992; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3993; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3994; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003995;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003996; ZNVER1-LABEL: test_pminub:
3997; ZNVER1: # BB#0:
3998; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3999; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4000; ZNVER1-NEXT: retq # sched: [1:0.50]
4001 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
4002 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
4003 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
4004 ret <32 x i8> %3
4005}
4006declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
4007
4008define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4009; GENERIC-LABEL: test_pminud:
4010; GENERIC: # BB#0:
4011; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4012; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4013; GENERIC-NEXT: retq # sched: [1:1.00]
4014;
4015; HASWELL-LABEL: test_pminud:
4016; HASWELL: # BB#0:
4017; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4018; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4019; HASWELL-NEXT: retq # sched: [2:1.00]
4020;
Gadi Haber85d99b42017-10-17 13:45:39 +00004021; BROADWELL-LABEL: test_pminud:
4022; BROADWELL: # BB#0:
4023; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004024; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4025; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004026;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004027; SKYLAKE-LABEL: test_pminud:
4028; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004029; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4030; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4031; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004032;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004033; SKX-LABEL: test_pminud:
4034; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004035; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4036; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4037; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004038;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004039; ZNVER1-LABEL: test_pminud:
4040; ZNVER1: # BB#0:
4041; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4042; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4043; ZNVER1-NEXT: retq # sched: [1:0.50]
4044 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
4045 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4046 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
4047 ret <8 x i32> %3
4048}
4049declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
4050
4051define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4052; GENERIC-LABEL: test_pminuw:
4053; GENERIC: # BB#0:
4054; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4055; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4056; GENERIC-NEXT: retq # sched: [1:1.00]
4057;
4058; HASWELL-LABEL: test_pminuw:
4059; HASWELL: # BB#0:
4060; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4061; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4062; HASWELL-NEXT: retq # sched: [2:1.00]
4063;
Gadi Haber85d99b42017-10-17 13:45:39 +00004064; BROADWELL-LABEL: test_pminuw:
4065; BROADWELL: # BB#0:
4066; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004067; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4068; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004069;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004070; SKYLAKE-LABEL: test_pminuw:
4071; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004072; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4073; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4074; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004075;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004076; SKX-LABEL: test_pminuw:
4077; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00004078; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4079; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4080; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004081;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004082; ZNVER1-LABEL: test_pminuw:
4083; ZNVER1: # BB#0:
4084; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4085; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4086; ZNVER1-NEXT: retq # sched: [1:0.50]
4087 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
4088 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4089 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
4090 ret <16 x i16> %3
4091}
4092declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
4093
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004094define i32 @test_pmovmskb(<32 x i8> %a0) {
4095; GENERIC-LABEL: test_pmovmskb:
4096; GENERIC: # BB#0:
4097; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
4098; GENERIC-NEXT: vzeroupper
4099; GENERIC-NEXT: retq # sched: [1:1.00]
4100;
4101; HASWELL-LABEL: test_pmovmskb:
4102; HASWELL: # BB#0:
4103; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4104; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
4105; HASWELL-NEXT: retq # sched: [2:1.00]
4106;
Gadi Haber85d99b42017-10-17 13:45:39 +00004107; BROADWELL-LABEL: test_pmovmskb:
4108; BROADWELL: # BB#0:
4109; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4110; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004111; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004112;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004113; SKYLAKE-LABEL: test_pmovmskb:
4114; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004115; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004116; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004117; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004118;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004119; SKX-LABEL: test_pmovmskb:
4120; SKX: # BB#0:
4121; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4122; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004123; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004124;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004125; ZNVER1-LABEL: test_pmovmskb:
4126; ZNVER1: # BB#0:
4127; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4128; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4129; ZNVER1-NEXT: retq # sched: [1:0.50]
4130 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
4131 ret i32 %1
4132}
4133declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
4134
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004135define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4136; GENERIC-LABEL: test_pmovsxbd:
4137; GENERIC: # BB#0:
4138; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
4139; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
4140; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4141; GENERIC-NEXT: retq # sched: [1:1.00]
4142;
4143; HASWELL-LABEL: test_pmovsxbd:
4144; HASWELL: # BB#0:
4145; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4146; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00]
4147; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4148; HASWELL-NEXT: retq # sched: [2:1.00]
4149;
Gadi Haber85d99b42017-10-17 13:45:39 +00004150; BROADWELL-LABEL: test_pmovsxbd:
4151; BROADWELL: # BB#0:
4152; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004153; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004154; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004155; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004156;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004157; SKYLAKE-LABEL: test_pmovsxbd:
4158; SKYLAKE: # BB#0:
4159; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004160; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4161; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4162; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004163;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004164; SKX-LABEL: test_pmovsxbd:
4165; SKX: # BB#0:
4166; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004167; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4168; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4169; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004170;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004171; ZNVER1-LABEL: test_pmovsxbd:
4172; ZNVER1: # BB#0:
4173; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
4174; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
4175; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4176; ZNVER1-NEXT: retq # sched: [1:0.50]
4177 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4178 %2 = sext <8 x i8> %1 to <8 x i32>
4179 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4180 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4181 %5 = sext <8 x i8> %4 to <8 x i32>
4182 %6 = add <8 x i32> %2, %5
4183 ret <8 x i32> %6
4184}
4185
4186define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4187; GENERIC-LABEL: test_pmovsxbq:
4188; GENERIC: # BB#0:
4189; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
4190; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
4191; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4192; GENERIC-NEXT: retq # sched: [1:1.00]
4193;
4194; HASWELL-LABEL: test_pmovsxbq:
4195; HASWELL: # BB#0:
4196; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4197; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00]
4198; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4199; HASWELL-NEXT: retq # sched: [2:1.00]
4200;
Gadi Haber85d99b42017-10-17 13:45:39 +00004201; BROADWELL-LABEL: test_pmovsxbq:
4202; BROADWELL: # BB#0:
4203; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004204; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004205; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004206; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004207;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004208; SKYLAKE-LABEL: test_pmovsxbq:
4209; SKYLAKE: # BB#0:
4210; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004211; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4212; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4213; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004214;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004215; SKX-LABEL: test_pmovsxbq:
4216; SKX: # BB#0:
4217; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004218; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4219; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4220; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004221;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004222; ZNVER1-LABEL: test_pmovsxbq:
4223; ZNVER1: # BB#0:
4224; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
4225; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
4226; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4227; ZNVER1-NEXT: retq # sched: [1:0.50]
4228 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4229 %2 = sext <4 x i8> %1 to <4 x i64>
4230 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4231 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4232 %5 = sext <4 x i8> %4 to <4 x i64>
4233 %6 = add <4 x i64> %2, %5
4234 ret <4 x i64> %6
4235}
4236
4237define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4238; GENERIC-LABEL: test_pmovsxbw:
4239; GENERIC: # BB#0:
4240; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
4241; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
4242; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4243; GENERIC-NEXT: retq # sched: [1:1.00]
4244;
4245; HASWELL-LABEL: test_pmovsxbw:
4246; HASWELL: # BB#0:
4247; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4248; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00]
4249; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4250; HASWELL-NEXT: retq # sched: [2:1.00]
4251;
Gadi Haber85d99b42017-10-17 13:45:39 +00004252; BROADWELL-LABEL: test_pmovsxbw:
4253; BROADWELL: # BB#0:
4254; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004255; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004256; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004257; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004258;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004259; SKYLAKE-LABEL: test_pmovsxbw:
4260; SKYLAKE: # BB#0:
4261; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004262; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4263; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4264; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004265;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004266; SKX-LABEL: test_pmovsxbw:
4267; SKX: # BB#0:
4268; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004269; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4270; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4271; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004272;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004273; ZNVER1-LABEL: test_pmovsxbw:
4274; ZNVER1: # BB#0:
4275; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
4276; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
4277; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4278; ZNVER1-NEXT: retq # sched: [1:0.50]
4279 %1 = sext <16 x i8> %a0 to <16 x i16>
4280 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4281 %3 = sext <16 x i8> %2 to <16 x i16>
4282 %4 = add <16 x i16> %1, %3
4283 ret <16 x i16> %4
4284}
4285
4286define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4287; GENERIC-LABEL: test_pmovsxdq:
4288; GENERIC: # BB#0:
4289; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4290; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
4291; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4292; GENERIC-NEXT: retq # sched: [1:1.00]
4293;
4294; HASWELL-LABEL: test_pmovsxdq:
4295; HASWELL: # BB#0:
4296; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4297; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00]
4298; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4299; HASWELL-NEXT: retq # sched: [2:1.00]
4300;
Gadi Haber85d99b42017-10-17 13:45:39 +00004301; BROADWELL-LABEL: test_pmovsxdq:
4302; BROADWELL: # BB#0:
4303; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004304; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004305; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004306; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004307;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004308; SKYLAKE-LABEL: test_pmovsxdq:
4309; SKYLAKE: # BB#0:
4310; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004311; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4312; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4313; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004314;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004315; SKX-LABEL: test_pmovsxdq:
4316; SKX: # BB#0:
4317; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004318; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4319; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4320; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004321;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004322; ZNVER1-LABEL: test_pmovsxdq:
4323; ZNVER1: # BB#0:
4324; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
4325; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
4326; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4327; ZNVER1-NEXT: retq # sched: [1:0.50]
4328 %1 = sext <4 x i32> %a0 to <4 x i64>
4329 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4330 %3 = sext <4 x i32> %2 to <4 x i64>
4331 %4 = add <4 x i64> %1, %3
4332 ret <4 x i64> %4
4333}
4334
4335define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4336; GENERIC-LABEL: test_pmovsxwd:
4337; GENERIC: # BB#0:
4338; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
4339; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
4340; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4341; GENERIC-NEXT: retq # sched: [1:1.00]
4342;
4343; HASWELL-LABEL: test_pmovsxwd:
4344; HASWELL: # BB#0:
4345; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4346; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00]
4347; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4348; HASWELL-NEXT: retq # sched: [2:1.00]
4349;
Gadi Haber85d99b42017-10-17 13:45:39 +00004350; BROADWELL-LABEL: test_pmovsxwd:
4351; BROADWELL: # BB#0:
4352; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004353; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004354; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004355; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004356;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004357; SKYLAKE-LABEL: test_pmovsxwd:
4358; SKYLAKE: # BB#0:
4359; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004360; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4361; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4362; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004363;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004364; SKX-LABEL: test_pmovsxwd:
4365; SKX: # BB#0:
4366; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004367; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4368; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4369; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004370;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004371; ZNVER1-LABEL: test_pmovsxwd:
4372; ZNVER1: # BB#0:
4373; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
4374; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
4375; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4376; ZNVER1-NEXT: retq # sched: [1:0.50]
4377 %1 = sext <8 x i16> %a0 to <8 x i32>
4378 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4379 %3 = sext <8 x i16> %2 to <8 x i32>
4380 %4 = add <8 x i32> %1, %3
4381 ret <8 x i32> %4
4382}
4383
4384define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4385; GENERIC-LABEL: test_pmovsxwq:
4386; GENERIC: # BB#0:
4387; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
4388; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
4389; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4390; GENERIC-NEXT: retq # sched: [1:1.00]
4391;
4392; HASWELL-LABEL: test_pmovsxwq:
4393; HASWELL: # BB#0:
4394; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4395; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00]
4396; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4397; HASWELL-NEXT: retq # sched: [2:1.00]
4398;
Gadi Haber85d99b42017-10-17 13:45:39 +00004399; BROADWELL-LABEL: test_pmovsxwq:
4400; BROADWELL: # BB#0:
4401; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004402; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004403; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004404; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004405;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004406; SKYLAKE-LABEL: test_pmovsxwq:
4407; SKYLAKE: # BB#0:
4408; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004409; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4410; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4411; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004412;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004413; SKX-LABEL: test_pmovsxwq:
4414; SKX: # BB#0:
4415; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004416; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4417; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4418; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004419;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004420; ZNVER1-LABEL: test_pmovsxwq:
4421; ZNVER1: # BB#0:
4422; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
4423; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
4424; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4425; ZNVER1-NEXT: retq # sched: [1:0.50]
4426 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4427 %2 = sext <4 x i16> %1 to <4 x i64>
4428 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4429 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4430 %5 = sext <4 x i16> %4 to <4 x i64>
4431 %6 = add <4 x i64> %2, %5
4432 ret <4 x i64> %6
4433}
4434
4435define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4436; GENERIC-LABEL: test_pmovzxbd:
4437; GENERIC: # BB#0:
4438; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4439; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
4440; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4441; GENERIC-NEXT: retq # sched: [1:1.00]
4442;
4443; HASWELL-LABEL: test_pmovzxbd:
4444; HASWELL: # BB#0:
4445; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4446; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00]
4447; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4448; HASWELL-NEXT: retq # sched: [2:1.00]
4449;
Gadi Haber85d99b42017-10-17 13:45:39 +00004450; BROADWELL-LABEL: test_pmovzxbd:
4451; BROADWELL: # BB#0:
4452; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004453; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004454; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004455; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004456;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004457; SKYLAKE-LABEL: test_pmovzxbd:
4458; SKYLAKE: # BB#0:
4459; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004460; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4461; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4462; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004463;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004464; SKX-LABEL: test_pmovzxbd:
4465; SKX: # BB#0:
4466; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004467; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4468; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4469; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004470;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004471; ZNVER1-LABEL: test_pmovzxbd:
4472; ZNVER1: # BB#0:
4473; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
4474; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
4475; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4476; ZNVER1-NEXT: retq # sched: [1:0.50]
4477 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4478 %2 = zext <8 x i8> %1 to <8 x i32>
4479 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4480 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4481 %5 = zext <8 x i8> %4 to <8 x i32>
4482 %6 = add <8 x i32> %2, %5
4483 ret <8 x i32> %6
4484}
4485
4486define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4487; GENERIC-LABEL: test_pmovzxbq:
4488; GENERIC: # BB#0:
4489; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4490; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
4491; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4492; GENERIC-NEXT: retq # sched: [1:1.00]
4493;
4494; HASWELL-LABEL: test_pmovzxbq:
4495; HASWELL: # BB#0:
4496; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4497; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4498; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4499; HASWELL-NEXT: retq # sched: [2:1.00]
4500;
Gadi Haber85d99b42017-10-17 13:45:39 +00004501; BROADWELL-LABEL: test_pmovzxbq:
4502; BROADWELL: # BB#0:
4503; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004504; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004505; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004506; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004507;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004508; SKYLAKE-LABEL: test_pmovzxbq:
4509; SKYLAKE: # BB#0:
4510; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004511; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4512; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4513; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004514;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004515; SKX-LABEL: test_pmovzxbq:
4516; SKX: # BB#0:
4517; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004518; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4519; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4520; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004521;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004522; ZNVER1-LABEL: test_pmovzxbq:
4523; ZNVER1: # BB#0:
4524; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
4525; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
4526; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4527; ZNVER1-NEXT: retq # sched: [1:0.50]
4528 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4529 %2 = zext <4 x i8> %1 to <4 x i64>
4530 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4531 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4532 %5 = zext <4 x i8> %4 to <4 x i64>
4533 %6 = add <4 x i64> %2, %5
4534 ret <4 x i64> %6
4535}
4536
4537define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4538; GENERIC-LABEL: test_pmovzxbw:
4539; GENERIC: # BB#0:
4540; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4541; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
4542; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4543; GENERIC-NEXT: retq # sched: [1:1.00]
4544;
4545; HASWELL-LABEL: test_pmovzxbw:
4546; HASWELL: # BB#0:
4547; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4548; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00]
4549; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4550; HASWELL-NEXT: retq # sched: [2:1.00]
4551;
Gadi Haber85d99b42017-10-17 13:45:39 +00004552; BROADWELL-LABEL: test_pmovzxbw:
4553; BROADWELL: # BB#0:
4554; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004555; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004556; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004557; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004558;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004559; SKYLAKE-LABEL: test_pmovzxbw:
4560; SKYLAKE: # BB#0:
4561; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004562; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4563; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4564; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004565;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004566; SKX-LABEL: test_pmovzxbw:
4567; SKX: # BB#0:
4568; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004569; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4570; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4571; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004572;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004573; ZNVER1-LABEL: test_pmovzxbw:
4574; ZNVER1: # BB#0:
4575; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
4576; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
4577; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4578; ZNVER1-NEXT: retq # sched: [1:0.50]
4579 %1 = zext <16 x i8> %a0 to <16 x i16>
4580 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4581 %3 = zext <16 x i8> %2 to <16 x i16>
4582 %4 = add <16 x i16> %1, %3
4583 ret <16 x i16> %4
4584}
4585
4586define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4587; GENERIC-LABEL: test_pmovzxdq:
4588; GENERIC: # BB#0:
4589; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4590; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
4591; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4592; GENERIC-NEXT: retq # sched: [1:1.00]
4593;
4594; HASWELL-LABEL: test_pmovzxdq:
4595; HASWELL: # BB#0:
4596; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4597; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00]
4598; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4599; HASWELL-NEXT: retq # sched: [2:1.00]
4600;
Gadi Haber85d99b42017-10-17 13:45:39 +00004601; BROADWELL-LABEL: test_pmovzxdq:
4602; BROADWELL: # BB#0:
4603; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004604; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004605; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004606; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004607;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004608; SKYLAKE-LABEL: test_pmovzxdq:
4609; SKYLAKE: # BB#0:
4610; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004611; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4612; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4613; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004614;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004615; SKX-LABEL: test_pmovzxdq:
4616; SKX: # BB#0:
4617; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004618; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4619; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4620; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004621;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004622; ZNVER1-LABEL: test_pmovzxdq:
4623; ZNVER1: # BB#0:
4624; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
4625; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
4626; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4627; ZNVER1-NEXT: retq # sched: [1:0.50]
4628 %1 = zext <4 x i32> %a0 to <4 x i64>
4629 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4630 %3 = zext <4 x i32> %2 to <4 x i64>
4631 %4 = add <4 x i64> %1, %3
4632 ret <4 x i64> %4
4633}
4634
4635define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4636; GENERIC-LABEL: test_pmovzxwd:
4637; GENERIC: # BB#0:
4638; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4639; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
4640; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4641; GENERIC-NEXT: retq # sched: [1:1.00]
4642;
4643; HASWELL-LABEL: test_pmovzxwd:
4644; HASWELL: # BB#0:
4645; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4646; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00]
4647; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4648; HASWELL-NEXT: retq # sched: [2:1.00]
4649;
Gadi Haber85d99b42017-10-17 13:45:39 +00004650; BROADWELL-LABEL: test_pmovzxwd:
4651; BROADWELL: # BB#0:
4652; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004653; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004654; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004655; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004656;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004657; SKYLAKE-LABEL: test_pmovzxwd:
4658; SKYLAKE: # BB#0:
4659; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004660; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4661; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4662; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004663;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004664; SKX-LABEL: test_pmovzxwd:
4665; SKX: # BB#0:
4666; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004667; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4668; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4669; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004670;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004671; ZNVER1-LABEL: test_pmovzxwd:
4672; ZNVER1: # BB#0:
4673; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4674; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
4675; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4676; ZNVER1-NEXT: retq # sched: [1:0.50]
4677 %1 = zext <8 x i16> %a0 to <8 x i32>
4678 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4679 %3 = zext <8 x i16> %2 to <8 x i32>
4680 %4 = add <8 x i32> %1, %3
4681 ret <8 x i32> %4
4682}
4683
4684define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4685; GENERIC-LABEL: test_pmovzxwq:
4686; GENERIC: # BB#0:
4687; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4688; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
4689; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4690; GENERIC-NEXT: retq # sched: [1:1.00]
4691;
4692; HASWELL-LABEL: test_pmovzxwq:
4693; HASWELL: # BB#0:
4694; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4695; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00]
4696; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4697; HASWELL-NEXT: retq # sched: [2:1.00]
4698;
Gadi Haber85d99b42017-10-17 13:45:39 +00004699; BROADWELL-LABEL: test_pmovzxwq:
4700; BROADWELL: # BB#0:
4701; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004702; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004703; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004704; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004705;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004706; SKYLAKE-LABEL: test_pmovzxwq:
4707; SKYLAKE: # BB#0:
4708; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004709; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4710; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4711; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004712;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004713; SKX-LABEL: test_pmovzxwq:
4714; SKX: # BB#0:
4715; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004716; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4717; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4718; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004719;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004720; ZNVER1-LABEL: test_pmovzxwq:
4721; ZNVER1: # BB#0:
4722; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4723; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
4724; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4725; ZNVER1-NEXT: retq # sched: [1:0.50]
4726 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4727 %2 = zext <4 x i16> %1 to <4 x i64>
4728 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4729 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4730 %5 = zext <4 x i16> %4 to <4 x i64>
4731 %6 = add <4 x i64> %2, %5
4732 ret <4 x i64> %6
4733}
4734
4735define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4736; GENERIC-LABEL: test_pmuldq:
4737; GENERIC: # BB#0:
4738; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4739; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4740; GENERIC-NEXT: retq # sched: [1:1.00]
4741;
4742; HASWELL-LABEL: test_pmuldq:
4743; HASWELL: # BB#0:
4744; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4745; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4746; HASWELL-NEXT: retq # sched: [2:1.00]
4747;
Gadi Haber85d99b42017-10-17 13:45:39 +00004748; BROADWELL-LABEL: test_pmuldq:
4749; BROADWELL: # BB#0:
4750; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004751; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4752; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004753;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004754; SKYLAKE-LABEL: test_pmuldq:
4755; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004756; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004757; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4758; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004759;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004760; SKX-LABEL: test_pmuldq:
4761; SKX: # BB#0:
4762; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004763; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4764; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004765;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004766; ZNVER1-LABEL: test_pmuldq:
4767; ZNVER1: # BB#0:
4768; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4769; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4770; ZNVER1-NEXT: retq # sched: [1:0.50]
4771 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4772 %2 = bitcast <4 x i64> %1 to <8 x i32>
4773 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4774 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
4775 ret <4 x i64> %4
4776}
4777declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4778
4779define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4780; GENERIC-LABEL: test_pmulhrsw:
4781; GENERIC: # BB#0:
4782; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4783; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4784; GENERIC-NEXT: retq # sched: [1:1.00]
4785;
4786; HASWELL-LABEL: test_pmulhrsw:
4787; HASWELL: # BB#0:
4788; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4789; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4790; HASWELL-NEXT: retq # sched: [2:1.00]
4791;
Gadi Haber85d99b42017-10-17 13:45:39 +00004792; BROADWELL-LABEL: test_pmulhrsw:
4793; BROADWELL: # BB#0:
4794; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004795; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4796; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004797;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004798; SKYLAKE-LABEL: test_pmulhrsw:
4799; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004800; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004801; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4802; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004803;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004804; SKX-LABEL: test_pmulhrsw:
4805; SKX: # BB#0:
4806; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004807; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4808; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004809;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004810; ZNVER1-LABEL: test_pmulhrsw:
4811; ZNVER1: # BB#0:
4812; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4813; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4814; ZNVER1-NEXT: retq # sched: [1:0.50]
4815 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4816 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4817 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4818 ret <16 x i16> %3
4819}
4820declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4821
4822define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4823; GENERIC-LABEL: test_pmulhuw:
4824; GENERIC: # BB#0:
4825; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4826; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4827; GENERIC-NEXT: retq # sched: [1:1.00]
4828;
4829; HASWELL-LABEL: test_pmulhuw:
4830; HASWELL: # BB#0:
4831; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4832; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4833; HASWELL-NEXT: retq # sched: [2:1.00]
4834;
Gadi Haber85d99b42017-10-17 13:45:39 +00004835; BROADWELL-LABEL: test_pmulhuw:
4836; BROADWELL: # BB#0:
4837; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004838; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4839; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004840;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004841; SKYLAKE-LABEL: test_pmulhuw:
4842; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004843; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004844; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4845; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004846;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004847; SKX-LABEL: test_pmulhuw:
4848; SKX: # BB#0:
4849; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004850; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4851; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004852;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004853; ZNVER1-LABEL: test_pmulhuw:
4854; ZNVER1: # BB#0:
4855; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4856; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4857; ZNVER1-NEXT: retq # sched: [1:0.50]
4858 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4859 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4860 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4861 ret <16 x i16> %3
4862}
4863declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4864
4865define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4866; GENERIC-LABEL: test_pmulhw:
4867; GENERIC: # BB#0:
4868; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4869; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4870; GENERIC-NEXT: retq # sched: [1:1.00]
4871;
4872; HASWELL-LABEL: test_pmulhw:
4873; HASWELL: # BB#0:
4874; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4875; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4876; HASWELL-NEXT: retq # sched: [2:1.00]
4877;
Gadi Haber85d99b42017-10-17 13:45:39 +00004878; BROADWELL-LABEL: test_pmulhw:
4879; BROADWELL: # BB#0:
4880; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004881; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4882; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004883;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004884; SKYLAKE-LABEL: test_pmulhw:
4885; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004886; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004887; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4888; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004889;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004890; SKX-LABEL: test_pmulhw:
4891; SKX: # BB#0:
4892; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004893; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4894; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004895;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004896; ZNVER1-LABEL: test_pmulhw:
4897; ZNVER1: # BB#0:
4898; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4899; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4900; ZNVER1-NEXT: retq # sched: [1:0.50]
4901 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4902 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4903 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4904 ret <16 x i16> %3
4905}
4906declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4907
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004908define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004909; GENERIC-LABEL: test_pmulld:
4910; GENERIC: # BB#0:
4911; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4912; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4913; GENERIC-NEXT: retq # sched: [1:1.00]
4914;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004915; HASWELL-LABEL: test_pmulld:
4916; HASWELL: # BB#0:
4917; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4918; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004919; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004920;
Gadi Haber85d99b42017-10-17 13:45:39 +00004921; BROADWELL-LABEL: test_pmulld:
4922; BROADWELL: # BB#0:
4923; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004924; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
4925; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004926;
Gadi Haber767d98b2017-08-30 08:08:50 +00004927; SKYLAKE-LABEL: test_pmulld:
4928; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004929; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004930; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4931; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004932;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004933; SKX-LABEL: test_pmulld:
4934; SKX: # BB#0:
4935; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00004936; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4937; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004938;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004939; ZNVER1-LABEL: test_pmulld:
4940; ZNVER1: # BB#0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004941; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4942; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4943; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004944 %1 = mul <8 x i32> %a0, %a1
4945 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4946 %3 = mul <8 x i32> %1, %2
4947 ret <8 x i32> %3
4948}
4949
4950define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004951; GENERIC-LABEL: test_pmullw:
4952; GENERIC: # BB#0:
4953; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4954; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4955; GENERIC-NEXT: retq # sched: [1:1.00]
4956;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004957; HASWELL-LABEL: test_pmullw:
4958; HASWELL: # BB#0:
4959; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haberd76f7b82017-08-28 10:04:16 +00004960; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4961; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004962;
Gadi Haber85d99b42017-10-17 13:45:39 +00004963; BROADWELL-LABEL: test_pmullw:
4964; BROADWELL: # BB#0:
4965; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004966; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4967; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004968;
Gadi Haber767d98b2017-08-30 08:08:50 +00004969; SKYLAKE-LABEL: test_pmullw:
4970; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004971; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004972; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4973; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004974;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004975; SKX-LABEL: test_pmullw:
4976; SKX: # BB#0:
4977; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004978; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4979; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004980;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004981; ZNVER1-LABEL: test_pmullw:
4982; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00004983; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4984; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004985; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004986 %1 = mul <16 x i16> %a0, %a1
4987 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4988 %3 = mul <16 x i16> %1, %2
4989 ret <16 x i16> %3
4990}
4991
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004992define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4993; GENERIC-LABEL: test_pmuludq:
4994; GENERIC: # BB#0:
4995; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4996; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4997; GENERIC-NEXT: retq # sched: [1:1.00]
4998;
4999; HASWELL-LABEL: test_pmuludq:
5000; HASWELL: # BB#0:
5001; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5002; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5003; HASWELL-NEXT: retq # sched: [2:1.00]
5004;
Gadi Haber85d99b42017-10-17 13:45:39 +00005005; BROADWELL-LABEL: test_pmuludq:
5006; BROADWELL: # BB#0:
5007; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005008; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5009; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005010;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005011; SKYLAKE-LABEL: test_pmuludq:
5012; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00005013; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005014; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5015; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005016;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005017; SKX-LABEL: test_pmuludq:
5018; SKX: # BB#0:
5019; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00005020; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5021; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005022;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005023; ZNVER1-LABEL: test_pmuludq:
5024; ZNVER1: # BB#0:
5025; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5026; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5027; ZNVER1-NEXT: retq # sched: [1:0.50]
5028 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
5029 %2 = bitcast <4 x i64> %1 to <8 x i32>
5030 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
5031 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
5032 ret <4 x i64> %4
5033}
5034declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
5035
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005036define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005037; GENERIC-LABEL: test_por:
5038; GENERIC: # BB#0:
5039; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5040; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5041; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5042; GENERIC-NEXT: retq # sched: [1:1.00]
5043;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005044; HASWELL-LABEL: test_por:
5045; HASWELL: # BB#0:
5046; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005047; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005048; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00005049; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005050;
Gadi Haber85d99b42017-10-17 13:45:39 +00005051; BROADWELL-LABEL: test_por:
5052; BROADWELL: # BB#0:
5053; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005054; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00005055; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005056; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005057;
Gadi Haber767d98b2017-08-30 08:08:50 +00005058; SKYLAKE-LABEL: test_por:
5059; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005060; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5061; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5062; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5063; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005064;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005065; SKX-LABEL: test_por:
5066; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005067; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5068; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5069; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5070; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005071;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005072; ZNVER1-LABEL: test_por:
5073; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00005074; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5075; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5076; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005077; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005078 %1 = or <4 x i64> %a0, %a1
5079 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5080 %3 = or <4 x i64> %1, %2
5081 %4 = add <4 x i64> %3, %a1
5082 ret <4 x i64> %4
5083}
5084
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005085define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5086; GENERIC-LABEL: test_psadbw:
5087; GENERIC: # BB#0:
5088; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5089; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5090; GENERIC-NEXT: retq # sched: [1:1.00]
5091;
5092; HASWELL-LABEL: test_psadbw:
5093; HASWELL: # BB#0:
5094; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5095; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5096; HASWELL-NEXT: retq # sched: [2:1.00]
5097;
Gadi Haber85d99b42017-10-17 13:45:39 +00005098; BROADWELL-LABEL: test_psadbw:
5099; BROADWELL: # BB#0:
5100; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005101; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5102; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005103;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005104; SKYLAKE-LABEL: test_psadbw:
5105; SKYLAKE: # BB#0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00005106; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005107; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5108; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005109;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005110; SKX-LABEL: test_psadbw:
5111; SKX: # BB#0:
5112; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005113; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5114; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005115;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005116; ZNVER1-LABEL: test_psadbw:
5117; ZNVER1: # BB#0:
5118; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5119; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5120; ZNVER1-NEXT: retq # sched: [1:0.50]
5121 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
5122 %2 = bitcast <4 x i64> %1 to <32 x i8>
5123 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
5124 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
5125 ret <4 x i64> %4
5126}
5127declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
5128
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005129define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5130; GENERIC-LABEL: test_pshufb:
5131; GENERIC: # BB#0:
5132; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5133; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5134; GENERIC-NEXT: retq # sched: [1:1.00]
5135;
5136; HASWELL-LABEL: test_pshufb:
5137; HASWELL: # BB#0:
5138; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5139; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5140; HASWELL-NEXT: retq # sched: [2:1.00]
5141;
Gadi Haber85d99b42017-10-17 13:45:39 +00005142; BROADWELL-LABEL: test_pshufb:
5143; BROADWELL: # BB#0:
5144; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005145; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5146; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005147;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005148; SKYLAKE-LABEL: test_pshufb:
5149; SKYLAKE: # BB#0:
5150; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005151; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5152; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005153;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005154; SKX-LABEL: test_pshufb:
5155; SKX: # BB#0:
5156; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005157; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5158; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005159;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005160; ZNVER1-LABEL: test_pshufb:
5161; ZNVER1: # BB#0:
5162; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5163; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5164; ZNVER1-NEXT: retq # sched: [1:0.50]
5165 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
5166 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5167 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
5168 ret <32 x i8> %3
5169}
5170declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
5171
5172define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
5173; GENERIC-LABEL: test_pshufd:
5174; GENERIC: # BB#0:
5175; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5176; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005177; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005178; GENERIC-NEXT: retq # sched: [1:1.00]
5179;
5180; HASWELL-LABEL: test_pshufd:
5181; HASWELL: # BB#0:
5182; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5183; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005184; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005185; HASWELL-NEXT: retq # sched: [2:1.00]
5186;
Gadi Haber85d99b42017-10-17 13:45:39 +00005187; BROADWELL-LABEL: test_pshufd:
5188; BROADWELL: # BB#0:
5189; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005190; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005191; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005192; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005193;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005194; SKYLAKE-LABEL: test_pshufd:
5195; SKYLAKE: # BB#0:
5196; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005197; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5198; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5199; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005200;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005201; SKX-LABEL: test_pshufd:
5202; SKX: # BB#0:
5203; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005204; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5205; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5206; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005207;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005208; ZNVER1-LABEL: test_pshufd:
5209; ZNVER1: # BB#0:
5210; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
5211; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
Craig Topperd4341922017-09-18 03:29:47 +00005212; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005213; ZNVER1-NEXT: retq # sched: [1:0.50]
5214 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
5215 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
5216 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Craig Topperd4341922017-09-18 03:29:47 +00005217 %4 = add <8 x i32> %1, %3
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005218 ret <8 x i32> %4
5219}
5220
5221define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
5222; GENERIC-LABEL: test_pshufhw:
5223; GENERIC: # BB#0:
5224; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5225; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
5226; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5227; GENERIC-NEXT: retq # sched: [1:1.00]
5228;
5229; HASWELL-LABEL: test_pshufhw:
5230; HASWELL: # BB#0:
5231; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5232; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00]
5233; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5234; HASWELL-NEXT: retq # sched: [2:1.00]
5235;
Gadi Haber85d99b42017-10-17 13:45:39 +00005236; BROADWELL-LABEL: test_pshufhw:
5237; BROADWELL: # BB#0:
5238; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005239; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005240; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005241; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005242;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005243; SKYLAKE-LABEL: test_pshufhw:
5244; SKYLAKE: # BB#0:
5245; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005246; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5247; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5248; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005249;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005250; SKX-LABEL: test_pshufhw:
5251; SKX: # BB#0:
5252; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005253; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5254; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5255; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005256;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005257; ZNVER1-LABEL: test_pshufhw:
5258; ZNVER1: # BB#0:
5259; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
5260; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
5261; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5262; ZNVER1-NEXT: retq # sched: [1:0.50]
5263 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
5264 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5265 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
5266 %4 = or <16 x i16> %1, %3
5267 ret <16 x i16> %4
5268}
5269
5270define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
5271; GENERIC-LABEL: test_pshuflw:
5272; GENERIC: # BB#0:
5273; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5274; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
5275; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5276; GENERIC-NEXT: retq # sched: [1:1.00]
5277;
5278; HASWELL-LABEL: test_pshuflw:
5279; HASWELL: # BB#0:
5280; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5281; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00]
5282; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5283; HASWELL-NEXT: retq # sched: [2:1.00]
5284;
Gadi Haber85d99b42017-10-17 13:45:39 +00005285; BROADWELL-LABEL: test_pshuflw:
5286; BROADWELL: # BB#0:
5287; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005288; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005289; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005290; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005291;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005292; SKYLAKE-LABEL: test_pshuflw:
5293; SKYLAKE: # BB#0:
5294; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005295; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5296; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5297; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005298;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005299; SKX-LABEL: test_pshuflw:
5300; SKX: # BB#0:
5301; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005302; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5303; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5304; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005305;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005306; ZNVER1-LABEL: test_pshuflw:
5307; ZNVER1: # BB#0:
5308; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
5309; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
5310; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5311; ZNVER1-NEXT: retq # sched: [1:0.50]
5312 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
5313 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5314 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
5315 %4 = or <16 x i16> %1, %3
5316 ret <16 x i16> %4
5317}
5318
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005319define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5320; GENERIC-LABEL: test_psignb:
5321; GENERIC: # BB#0:
5322; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5323; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5324; GENERIC-NEXT: retq # sched: [1:1.00]
5325;
5326; HASWELL-LABEL: test_psignb:
5327; HASWELL: # BB#0:
5328; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5329; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5330; HASWELL-NEXT: retq # sched: [2:1.00]
5331;
Gadi Haber85d99b42017-10-17 13:45:39 +00005332; BROADWELL-LABEL: test_psignb:
5333; BROADWELL: # BB#0:
5334; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005335; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5336; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005337;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005338; SKYLAKE-LABEL: test_psignb:
5339; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005340; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5341; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5342; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005343;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005344; SKX-LABEL: test_psignb:
5345; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005346; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5347; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5348; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005349;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005350; ZNVER1-LABEL: test_psignb:
5351; ZNVER1: # BB#0:
5352; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5353; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5354; ZNVER1-NEXT: retq # sched: [1:0.50]
5355 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
5356 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5357 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
5358 ret <32 x i8> %3
5359}
5360declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
5361
5362define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5363; GENERIC-LABEL: test_psignd:
5364; GENERIC: # BB#0:
5365; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5366; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5367; GENERIC-NEXT: retq # sched: [1:1.00]
5368;
5369; HASWELL-LABEL: test_psignd:
5370; HASWELL: # BB#0:
5371; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5372; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5373; HASWELL-NEXT: retq # sched: [2:1.00]
5374;
Gadi Haber85d99b42017-10-17 13:45:39 +00005375; BROADWELL-LABEL: test_psignd:
5376; BROADWELL: # BB#0:
5377; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005378; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5379; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005380;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005381; SKYLAKE-LABEL: test_psignd:
5382; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005383; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5384; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5385; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005386;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005387; SKX-LABEL: test_psignd:
5388; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005389; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5390; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5391; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005392;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005393; ZNVER1-LABEL: test_psignd:
5394; ZNVER1: # BB#0:
5395; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5396; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5397; ZNVER1-NEXT: retq # sched: [1:0.50]
5398 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
5399 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5400 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
5401 ret <8 x i32> %3
5402}
5403declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
5404
5405define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5406; GENERIC-LABEL: test_psignw:
5407; GENERIC: # BB#0:
5408; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5409; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5410; GENERIC-NEXT: retq # sched: [1:1.00]
5411;
5412; HASWELL-LABEL: test_psignw:
5413; HASWELL: # BB#0:
5414; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5415; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5416; HASWELL-NEXT: retq # sched: [2:1.00]
5417;
Gadi Haber85d99b42017-10-17 13:45:39 +00005418; BROADWELL-LABEL: test_psignw:
5419; BROADWELL: # BB#0:
5420; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005421; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5422; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005423;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005424; SKYLAKE-LABEL: test_psignw:
5425; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005426; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5427; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5428; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005429;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005430; SKX-LABEL: test_psignw:
5431; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005432; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5433; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5434; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005435;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005436; ZNVER1-LABEL: test_psignw:
5437; ZNVER1: # BB#0:
5438; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5439; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5440; ZNVER1-NEXT: retq # sched: [1:0.50]
5441 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
5442 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5443 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
5444 ret <16 x i16> %3
5445}
5446declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
5447
5448define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5449; GENERIC-LABEL: test_pslld:
5450; GENERIC: # BB#0:
5451; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5452; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5453; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5454; GENERIC-NEXT: retq # sched: [1:1.00]
5455;
5456; HASWELL-LABEL: test_pslld:
5457; HASWELL: # BB#0:
5458; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5459; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5460; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5461; HASWELL-NEXT: retq # sched: [2:1.00]
5462;
Gadi Haber85d99b42017-10-17 13:45:39 +00005463; BROADWELL-LABEL: test_pslld:
5464; BROADWELL: # BB#0:
5465; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005466; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005467; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005468; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005469;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005470; SKYLAKE-LABEL: test_pslld:
5471; SKYLAKE: # BB#0:
5472; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005473; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5474; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5475; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005476;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005477; SKX-LABEL: test_pslld:
5478; SKX: # BB#0:
5479; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005480; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5481; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5482; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005483;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005484; ZNVER1-LABEL: test_pslld:
5485; ZNVER1: # BB#0:
5486; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5487; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5488; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
5489; ZNVER1-NEXT: retq # sched: [1:0.50]
5490 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
5491 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5492 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
5493 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5494 ret <8 x i32> %4
5495}
5496declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
5497
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005498define <32 x i8> @test_pslldq(<32 x i8> %a0) {
5499; GENERIC-LABEL: test_pslldq:
5500; GENERIC: # BB#0:
5501; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5502; GENERIC-NEXT: retq # sched: [1:1.00]
5503;
5504; HASWELL-LABEL: test_pslldq:
5505; HASWELL: # BB#0:
5506; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5507; HASWELL-NEXT: retq # sched: [2:1.00]
5508;
Gadi Haber85d99b42017-10-17 13:45:39 +00005509; BROADWELL-LABEL: test_pslldq:
5510; BROADWELL: # BB#0:
5511; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005512; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005513;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005514; SKYLAKE-LABEL: test_pslldq:
5515; SKYLAKE: # BB#0:
5516; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005517; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005518;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005519; SKX-LABEL: test_pslldq:
5520; SKX: # BB#0:
5521; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005522; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005523;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005524; ZNVER1-LABEL: test_pslldq:
5525; ZNVER1: # BB#0:
5526; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
5527; ZNVER1-NEXT: retq # sched: [1:0.50]
5528 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
5529 ret <32 x i8> %1
5530}
5531
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005532define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5533; GENERIC-LABEL: test_psllq:
5534; GENERIC: # BB#0:
5535; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5536; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5537; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5538; GENERIC-NEXT: retq # sched: [1:1.00]
5539;
5540; HASWELL-LABEL: test_psllq:
5541; HASWELL: # BB#0:
5542; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5543; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5544; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5545; HASWELL-NEXT: retq # sched: [2:1.00]
5546;
Gadi Haber85d99b42017-10-17 13:45:39 +00005547; BROADWELL-LABEL: test_psllq:
5548; BROADWELL: # BB#0:
5549; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005550; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005551; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005552; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005553;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005554; SKYLAKE-LABEL: test_psllq:
5555; SKYLAKE: # BB#0:
5556; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005557; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5558; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5559; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005560;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005561; SKX-LABEL: test_psllq:
5562; SKX: # BB#0:
5563; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005564; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5565; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5566; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005567;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005568; ZNVER1-LABEL: test_psllq:
5569; ZNVER1: # BB#0:
5570; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5571; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5572; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
5573; ZNVER1-NEXT: retq # sched: [1:0.50]
5574 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
5575 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5576 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
5577 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5578 ret <4 x i64> %4
5579}
5580declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
5581
5582define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5583; GENERIC-LABEL: test_psllvd:
5584; GENERIC: # BB#0:
5585; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5586; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5587; GENERIC-NEXT: retq # sched: [1:1.00]
5588;
5589; HASWELL-LABEL: test_psllvd:
5590; HASWELL: # BB#0:
5591; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5592; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5593; HASWELL-NEXT: retq # sched: [2:1.00]
5594;
Gadi Haber85d99b42017-10-17 13:45:39 +00005595; BROADWELL-LABEL: test_psllvd:
5596; BROADWELL: # BB#0:
5597; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005598; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5599; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005600;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005601; SKYLAKE-LABEL: test_psllvd:
5602; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005603; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5604; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5605; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005606;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005607; SKX-LABEL: test_psllvd:
5608; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005609; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5610; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5611; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005612;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005613; ZNVER1-LABEL: test_psllvd:
5614; ZNVER1: # BB#0:
5615; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5616; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5617; ZNVER1-NEXT: retq # sched: [1:0.50]
5618 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
5619 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5620 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
5621 ret <4 x i32> %3
5622}
5623declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
5624
5625define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5626; GENERIC-LABEL: test_psllvd_ymm:
5627; GENERIC: # BB#0:
5628; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5629; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5630; GENERIC-NEXT: retq # sched: [1:1.00]
5631;
5632; HASWELL-LABEL: test_psllvd_ymm:
5633; HASWELL: # BB#0:
5634; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5635; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5636; HASWELL-NEXT: retq # sched: [2:1.00]
5637;
Gadi Haber85d99b42017-10-17 13:45:39 +00005638; BROADWELL-LABEL: test_psllvd_ymm:
5639; BROADWELL: # BB#0:
5640; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005641; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5642; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005643;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005644; SKYLAKE-LABEL: test_psllvd_ymm:
5645; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005646; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5647; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5648; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005649;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005650; SKX-LABEL: test_psllvd_ymm:
5651; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005652; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5653; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5654; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005655;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005656; ZNVER1-LABEL: test_psllvd_ymm:
5657; ZNVER1: # BB#0:
5658; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5659; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5660; ZNVER1-NEXT: retq # sched: [1:0.50]
5661 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5662 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5663 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
5664 ret <8 x i32> %3
5665}
5666declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5667
5668define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5669; GENERIC-LABEL: test_psllvq:
5670; GENERIC: # BB#0:
5671; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5672; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5673; GENERIC-NEXT: retq # sched: [1:1.00]
5674;
5675; HASWELL-LABEL: test_psllvq:
5676; HASWELL: # BB#0:
5677; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5678; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
5679; HASWELL-NEXT: retq # sched: [2:1.00]
5680;
Gadi Haber85d99b42017-10-17 13:45:39 +00005681; BROADWELL-LABEL: test_psllvq:
5682; BROADWELL: # BB#0:
5683; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005684; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
5685; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005686;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005687; SKYLAKE-LABEL: test_psllvq:
5688; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005689; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5690; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5691; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005692;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005693; SKX-LABEL: test_psllvq:
5694; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005695; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5696; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5697; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005698;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005699; ZNVER1-LABEL: test_psllvq:
5700; ZNVER1: # BB#0:
5701; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5702; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5703; ZNVER1-NEXT: retq # sched: [1:0.50]
5704 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
5705 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5706 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
5707 ret <2 x i64> %3
5708}
5709declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
5710
5711define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5712; GENERIC-LABEL: test_psllvq_ymm:
5713; GENERIC: # BB#0:
5714; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5715; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5716; GENERIC-NEXT: retq # sched: [1:1.00]
5717;
5718; HASWELL-LABEL: test_psllvq_ymm:
5719; HASWELL: # BB#0:
5720; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5721; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5722; HASWELL-NEXT: retq # sched: [2:1.00]
5723;
Gadi Haber85d99b42017-10-17 13:45:39 +00005724; BROADWELL-LABEL: test_psllvq_ymm:
5725; BROADWELL: # BB#0:
5726; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005727; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5728; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005729;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005730; SKYLAKE-LABEL: test_psllvq_ymm:
5731; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005732; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5733; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5734; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005735;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005736; SKX-LABEL: test_psllvq_ymm:
5737; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005738; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5739; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5740; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005741;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005742; ZNVER1-LABEL: test_psllvq_ymm:
5743; ZNVER1: # BB#0:
5744; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5745; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5746; ZNVER1-NEXT: retq # sched: [1:0.50]
5747 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5748 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5749 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
5750 ret <4 x i64> %3
5751}
5752declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5753
5754define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5755; GENERIC-LABEL: test_psllw:
5756; GENERIC: # BB#0:
5757; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5758; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5759; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5760; GENERIC-NEXT: retq # sched: [1:1.00]
5761;
5762; HASWELL-LABEL: test_psllw:
5763; HASWELL: # BB#0:
5764; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5765; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5766; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5767; HASWELL-NEXT: retq # sched: [2:1.00]
5768;
Gadi Haber85d99b42017-10-17 13:45:39 +00005769; BROADWELL-LABEL: test_psllw:
5770; BROADWELL: # BB#0:
5771; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005772; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005773; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005774; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005775;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005776; SKYLAKE-LABEL: test_psllw:
5777; SKYLAKE: # BB#0:
5778; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005779; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5780; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5781; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005782;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005783; SKX-LABEL: test_psllw:
5784; SKX: # BB#0:
5785; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005786; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5787; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5788; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005789;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005790; ZNVER1-LABEL: test_psllw:
5791; ZNVER1: # BB#0:
5792; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5793; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5794; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
5795; ZNVER1-NEXT: retq # sched: [1:0.50]
5796 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
5797 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5798 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
5799 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5800 ret <16 x i16> %4
5801}
5802declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
5803
5804define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5805; GENERIC-LABEL: test_psrad:
5806; GENERIC: # BB#0:
5807; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5808; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5809; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5810; GENERIC-NEXT: retq # sched: [1:1.00]
5811;
5812; HASWELL-LABEL: test_psrad:
5813; HASWELL: # BB#0:
5814; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5815; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5816; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5817; HASWELL-NEXT: retq # sched: [2:1.00]
5818;
Gadi Haber85d99b42017-10-17 13:45:39 +00005819; BROADWELL-LABEL: test_psrad:
5820; BROADWELL: # BB#0:
5821; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005822; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005823; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005824; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005825;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005826; SKYLAKE-LABEL: test_psrad:
5827; SKYLAKE: # BB#0:
5828; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005829; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5830; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5831; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005832;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005833; SKX-LABEL: test_psrad:
5834; SKX: # BB#0:
5835; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005836; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5837; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5838; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005839;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005840; ZNVER1-LABEL: test_psrad:
5841; ZNVER1: # BB#0:
5842; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5843; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5844; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5845; ZNVER1-NEXT: retq # sched: [1:0.50]
5846 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5847 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5848 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5849 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5850 ret <8 x i32> %4
5851}
5852declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5853
5854define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5855; GENERIC-LABEL: test_psravd:
5856; GENERIC: # BB#0:
5857; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5858; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5859; GENERIC-NEXT: retq # sched: [1:1.00]
5860;
5861; HASWELL-LABEL: test_psravd:
5862; HASWELL: # BB#0:
5863; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5864; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5865; HASWELL-NEXT: retq # sched: [2:1.00]
5866;
Gadi Haber85d99b42017-10-17 13:45:39 +00005867; BROADWELL-LABEL: test_psravd:
5868; BROADWELL: # BB#0:
5869; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005870; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5871; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005872;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005873; SKYLAKE-LABEL: test_psravd:
5874; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005875; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5876; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5877; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005878;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005879; SKX-LABEL: test_psravd:
5880; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005881; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5882; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5883; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005884;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005885; ZNVER1-LABEL: test_psravd:
5886; ZNVER1: # BB#0:
5887; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5888; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5889; ZNVER1-NEXT: retq # sched: [1:0.50]
5890 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5891 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5892 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5893 ret <4 x i32> %3
5894}
5895declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5896
5897define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5898; GENERIC-LABEL: test_psravd_ymm:
5899; GENERIC: # BB#0:
5900; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5901; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5902; GENERIC-NEXT: retq # sched: [1:1.00]
5903;
5904; HASWELL-LABEL: test_psravd_ymm:
5905; HASWELL: # BB#0:
5906; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5907; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5908; HASWELL-NEXT: retq # sched: [2:1.00]
5909;
Gadi Haber85d99b42017-10-17 13:45:39 +00005910; BROADWELL-LABEL: test_psravd_ymm:
5911; BROADWELL: # BB#0:
5912; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005913; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5914; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005915;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005916; SKYLAKE-LABEL: test_psravd_ymm:
5917; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005918; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5919; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5920; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005921;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005922; SKX-LABEL: test_psravd_ymm:
5923; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00005924; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5925; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5926; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005927;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005928; ZNVER1-LABEL: test_psravd_ymm:
5929; ZNVER1: # BB#0:
5930; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5931; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5932; ZNVER1-NEXT: retq # sched: [1:0.50]
5933 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5934 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5935 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5936 ret <8 x i32> %3
5937}
5938declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5939
5940define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5941; GENERIC-LABEL: test_psraw:
5942; GENERIC: # BB#0:
5943; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5944; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5945; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5946; GENERIC-NEXT: retq # sched: [1:1.00]
5947;
5948; HASWELL-LABEL: test_psraw:
5949; HASWELL: # BB#0:
5950; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5951; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5952; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5953; HASWELL-NEXT: retq # sched: [2:1.00]
5954;
Gadi Haber85d99b42017-10-17 13:45:39 +00005955; BROADWELL-LABEL: test_psraw:
5956; BROADWELL: # BB#0:
5957; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005958; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005959; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005960; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005961;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005962; SKYLAKE-LABEL: test_psraw:
5963; SKYLAKE: # BB#0:
5964; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005965; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5966; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5967; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005968;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005969; SKX-LABEL: test_psraw:
5970; SKX: # BB#0:
5971; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005972; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5973; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5974; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005975;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005976; ZNVER1-LABEL: test_psraw:
5977; ZNVER1: # BB#0:
5978; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5979; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5980; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5981; ZNVER1-NEXT: retq # sched: [1:0.50]
5982 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5983 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5984 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5985 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5986 ret <16 x i16> %4
5987}
5988declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5989
5990define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5991; GENERIC-LABEL: test_psrld:
5992; GENERIC: # BB#0:
5993; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5994; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5995; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5996; GENERIC-NEXT: retq # sched: [1:1.00]
5997;
5998; HASWELL-LABEL: test_psrld:
5999; HASWELL: # BB#0:
6000; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6001; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6002; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
6003; HASWELL-NEXT: retq # sched: [2:1.00]
6004;
Gadi Haber85d99b42017-10-17 13:45:39 +00006005; BROADWELL-LABEL: test_psrld:
6006; BROADWELL: # BB#0:
6007; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006008; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006009; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006010; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006011;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006012; SKYLAKE-LABEL: test_psrld:
6013; SKYLAKE: # BB#0:
6014; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006015; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6016; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6017; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006018;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006019; SKX-LABEL: test_psrld:
6020; SKX: # BB#0:
6021; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006022; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6023; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6024; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006025;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006026; ZNVER1-LABEL: test_psrld:
6027; ZNVER1: # BB#0:
6028; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6029; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6030; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
6031; ZNVER1-NEXT: retq # sched: [1:0.50]
6032 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
6033 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6034 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
6035 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
6036 ret <8 x i32> %4
6037}
6038declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
6039
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006040define <32 x i8> @test_psrldq(<32 x i8> %a0) {
6041; GENERIC-LABEL: test_psrldq:
6042; GENERIC: # BB#0:
6043; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6044; GENERIC-NEXT: retq # sched: [1:1.00]
6045;
6046; HASWELL-LABEL: test_psrldq:
6047; HASWELL: # BB#0:
6048; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6049; HASWELL-NEXT: retq # sched: [2:1.00]
6050;
Gadi Haber85d99b42017-10-17 13:45:39 +00006051; BROADWELL-LABEL: test_psrldq:
6052; BROADWELL: # BB#0:
6053; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006054; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006055;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006056; SKYLAKE-LABEL: test_psrldq:
6057; SKYLAKE: # BB#0:
6058; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006059; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006060;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006061; SKX-LABEL: test_psrldq:
6062; SKX: # BB#0:
6063; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006064; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006065;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006066; ZNVER1-LABEL: test_psrldq:
6067; ZNVER1: # BB#0:
6068; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
6069; ZNVER1-NEXT: retq # sched: [1:0.50]
6070 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
6071 ret <32 x i8> %1
6072}
6073
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006074define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6075; GENERIC-LABEL: test_psrlq:
6076; GENERIC: # BB#0:
6077; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6078; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6079; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6080; GENERIC-NEXT: retq # sched: [1:1.00]
6081;
6082; HASWELL-LABEL: test_psrlq:
6083; HASWELL: # BB#0:
6084; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6085; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6086; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6087; HASWELL-NEXT: retq # sched: [2:1.00]
6088;
Gadi Haber85d99b42017-10-17 13:45:39 +00006089; BROADWELL-LABEL: test_psrlq:
6090; BROADWELL: # BB#0:
6091; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006092; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006093; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006094; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006095;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006096; SKYLAKE-LABEL: test_psrlq:
6097; SKYLAKE: # BB#0:
6098; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006099; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6100; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6101; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006102;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006103; SKX-LABEL: test_psrlq:
6104; SKX: # BB#0:
6105; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006106; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6107; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6108; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006109;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006110; ZNVER1-LABEL: test_psrlq:
6111; ZNVER1: # BB#0:
6112; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6113; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6114; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
6115; ZNVER1-NEXT: retq # sched: [1:0.50]
6116 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
6117 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6118 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
6119 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
6120 ret <4 x i64> %4
6121}
6122declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
6123
6124define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
6125; GENERIC-LABEL: test_psrlvd:
6126; GENERIC: # BB#0:
6127; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6128; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6129; GENERIC-NEXT: retq # sched: [1:1.00]
6130;
6131; HASWELL-LABEL: test_psrlvd:
6132; HASWELL: # BB#0:
6133; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
6134; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
6135; HASWELL-NEXT: retq # sched: [2:1.00]
6136;
Gadi Haber85d99b42017-10-17 13:45:39 +00006137; BROADWELL-LABEL: test_psrlvd:
6138; BROADWELL: # BB#0:
6139; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006140; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
6141; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006142;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006143; SKYLAKE-LABEL: test_psrlvd:
6144; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006145; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6146; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6147; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006148;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006149; SKX-LABEL: test_psrlvd:
6150; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006151; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6152; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6153; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006154;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006155; ZNVER1-LABEL: test_psrlvd:
6156; ZNVER1: # BB#0:
6157; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6158; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6159; ZNVER1-NEXT: retq # sched: [1:0.50]
6160 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
6161 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6162 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
6163 ret <4 x i32> %3
6164}
6165declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
6166
6167define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6168; GENERIC-LABEL: test_psrlvd_ymm:
6169; GENERIC: # BB#0:
6170; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6171; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6172; GENERIC-NEXT: retq # sched: [1:1.00]
6173;
6174; HASWELL-LABEL: test_psrlvd_ymm:
6175; HASWELL: # BB#0:
6176; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
6177; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
6178; HASWELL-NEXT: retq # sched: [2:1.00]
6179;
Gadi Haber85d99b42017-10-17 13:45:39 +00006180; BROADWELL-LABEL: test_psrlvd_ymm:
6181; BROADWELL: # BB#0:
6182; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006183; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
6184; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006185;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006186; SKYLAKE-LABEL: test_psrlvd_ymm:
6187; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006188; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6189; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6190; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006191;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006192; SKX-LABEL: test_psrlvd_ymm:
6193; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006194; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6195; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6196; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006197;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006198; ZNVER1-LABEL: test_psrlvd_ymm:
6199; ZNVER1: # BB#0:
6200; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6201; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6202; ZNVER1-NEXT: retq # sched: [1:0.50]
6203 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
6204 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6205 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
6206 ret <8 x i32> %3
6207}
6208declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
6209
6210define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6211; GENERIC-LABEL: test_psrlvq:
6212; GENERIC: # BB#0:
6213; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6214; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6215; GENERIC-NEXT: retq # sched: [1:1.00]
6216;
6217; HASWELL-LABEL: test_psrlvq:
6218; HASWELL: # BB#0:
6219; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6220; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
6221; HASWELL-NEXT: retq # sched: [2:1.00]
6222;
Gadi Haber85d99b42017-10-17 13:45:39 +00006223; BROADWELL-LABEL: test_psrlvq:
6224; BROADWELL: # BB#0:
6225; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006226; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
6227; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006228;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006229; SKYLAKE-LABEL: test_psrlvq:
6230; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006231; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6232; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6233; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006234;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006235; SKX-LABEL: test_psrlvq:
6236; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006237; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6238; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6239; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006240;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006241; ZNVER1-LABEL: test_psrlvq:
6242; ZNVER1: # BB#0:
6243; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6244; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6245; ZNVER1-NEXT: retq # sched: [1:0.50]
6246 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
6247 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6248 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
6249 ret <2 x i64> %3
6250}
6251declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
6252
6253define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6254; GENERIC-LABEL: test_psrlvq_ymm:
6255; GENERIC: # BB#0:
6256; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6257; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6258; GENERIC-NEXT: retq # sched: [1:1.00]
6259;
6260; HASWELL-LABEL: test_psrlvq_ymm:
6261; HASWELL: # BB#0:
6262; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6263; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6264; HASWELL-NEXT: retq # sched: [2:1.00]
6265;
Gadi Haber85d99b42017-10-17 13:45:39 +00006266; BROADWELL-LABEL: test_psrlvq_ymm:
6267; BROADWELL: # BB#0:
6268; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006269; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6270; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006271;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006272; SKYLAKE-LABEL: test_psrlvq_ymm:
6273; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006274; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6275; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6276; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006277;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006278; SKX-LABEL: test_psrlvq_ymm:
6279; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006280; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6281; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6282; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006283;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006284; ZNVER1-LABEL: test_psrlvq_ymm:
6285; ZNVER1: # BB#0:
6286; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6287; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6288; ZNVER1-NEXT: retq # sched: [1:0.50]
6289 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
6290 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6291 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
6292 ret <4 x i64> %3
6293}
6294declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
6295
6296define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
6297; GENERIC-LABEL: test_psrlw:
6298; GENERIC: # BB#0:
6299; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6300; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6301; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6302; GENERIC-NEXT: retq # sched: [1:1.00]
6303;
6304; HASWELL-LABEL: test_psrlw:
6305; HASWELL: # BB#0:
6306; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6307; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6308; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6309; HASWELL-NEXT: retq # sched: [2:1.00]
6310;
Gadi Haber85d99b42017-10-17 13:45:39 +00006311; BROADWELL-LABEL: test_psrlw:
6312; BROADWELL: # BB#0:
6313; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006314; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006315; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006316; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006317;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006318; SKYLAKE-LABEL: test_psrlw:
6319; SKYLAKE: # BB#0:
6320; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006321; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6322; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6323; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006324;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006325; SKX-LABEL: test_psrlw:
6326; SKX: # BB#0:
6327; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006328; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6329; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6330; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006331;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006332; ZNVER1-LABEL: test_psrlw:
6333; ZNVER1: # BB#0:
6334; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6335; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6336; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
6337; ZNVER1-NEXT: retq # sched: [1:0.50]
6338 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
6339 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
6340 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
6341 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
6342 ret <16 x i16> %4
6343}
6344declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
6345
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006346define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006347; GENERIC-LABEL: test_psubb:
6348; GENERIC: # BB#0:
6349; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6350; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6351; GENERIC-NEXT: retq # sched: [1:1.00]
6352;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006353; HASWELL-LABEL: test_psubb:
6354; HASWELL: # BB#0:
6355; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006356; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6357; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006358;
Gadi Haber85d99b42017-10-17 13:45:39 +00006359; BROADWELL-LABEL: test_psubb:
6360; BROADWELL: # BB#0:
6361; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006362; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6363; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006364;
Gadi Haber767d98b2017-08-30 08:08:50 +00006365; SKYLAKE-LABEL: test_psubb:
6366; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006367; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6368; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6369; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006370;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006371; SKX-LABEL: test_psubb:
6372; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006373; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6374; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6375; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006376;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006377; ZNVER1-LABEL: test_psubb:
6378; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00006379; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6380; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006381; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006382 %1 = sub <32 x i8> %a0, %a1
6383 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6384 %3 = sub <32 x i8> %1, %2
6385 ret <32 x i8> %3
6386}
6387
6388define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006389; GENERIC-LABEL: test_psubd:
6390; GENERIC: # BB#0:
6391; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6392; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6393; GENERIC-NEXT: retq # sched: [1:1.00]
6394;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006395; HASWELL-LABEL: test_psubd:
6396; HASWELL: # BB#0:
6397; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006398; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6399; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006400;
Gadi Haber85d99b42017-10-17 13:45:39 +00006401; BROADWELL-LABEL: test_psubd:
6402; BROADWELL: # BB#0:
6403; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006404; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6405; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006406;
Gadi Haber767d98b2017-08-30 08:08:50 +00006407; SKYLAKE-LABEL: test_psubd:
6408; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006409; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6410; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6411; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006412;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006413; SKX-LABEL: test_psubd:
6414; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006415; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6416; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6417; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006418;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006419; ZNVER1-LABEL: test_psubd:
6420; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00006421; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6422; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006423; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006424 %1 = sub <8 x i32> %a0, %a1
6425 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6426 %3 = sub <8 x i32> %1, %2
6427 ret <8 x i32> %3
6428}
6429
6430define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006431; GENERIC-LABEL: test_psubq:
6432; GENERIC: # BB#0:
6433; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6434; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6435; GENERIC-NEXT: retq # sched: [1:1.00]
6436;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006437; HASWELL-LABEL: test_psubq:
6438; HASWELL: # BB#0:
6439; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006440; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6441; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006442;
Gadi Haber85d99b42017-10-17 13:45:39 +00006443; BROADWELL-LABEL: test_psubq:
6444; BROADWELL: # BB#0:
6445; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006446; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6447; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006448;
Gadi Haber767d98b2017-08-30 08:08:50 +00006449; SKYLAKE-LABEL: test_psubq:
6450; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006451; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6452; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6453; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006454;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006455; SKX-LABEL: test_psubq:
6456; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006457; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6458; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6459; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006460;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006461; ZNVER1-LABEL: test_psubq:
6462; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00006463; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6464; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006465; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006466 %1 = sub <4 x i64> %a0, %a1
6467 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6468 %3 = sub <4 x i64> %1, %2
6469 ret <4 x i64> %3
6470}
6471
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006472define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6473; GENERIC-LABEL: test_psubsb:
6474; GENERIC: # BB#0:
6475; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6476; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6477; GENERIC-NEXT: retq # sched: [1:1.00]
6478;
6479; HASWELL-LABEL: test_psubsb:
6480; HASWELL: # BB#0:
6481; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6482; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6483; HASWELL-NEXT: retq # sched: [2:1.00]
6484;
Gadi Haber85d99b42017-10-17 13:45:39 +00006485; BROADWELL-LABEL: test_psubsb:
6486; BROADWELL: # BB#0:
6487; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006488; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6489; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006490;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006491; SKYLAKE-LABEL: test_psubsb:
6492; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006493; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6494; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6495; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006496;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006497; SKX-LABEL: test_psubsb:
6498; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006499; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6500; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6501; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006502;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006503; ZNVER1-LABEL: test_psubsb:
6504; ZNVER1: # BB#0:
6505; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6506; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6507; ZNVER1-NEXT: retq # sched: [1:0.50]
6508 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
6509 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6510 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2)
6511 ret <32 x i8> %3
6512}
6513declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
6514
6515define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6516; GENERIC-LABEL: test_psubsw:
6517; GENERIC: # BB#0:
6518; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6519; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6520; GENERIC-NEXT: retq # sched: [1:1.00]
6521;
6522; HASWELL-LABEL: test_psubsw:
6523; HASWELL: # BB#0:
6524; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6525; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6526; HASWELL-NEXT: retq # sched: [2:1.00]
6527;
Gadi Haber85d99b42017-10-17 13:45:39 +00006528; BROADWELL-LABEL: test_psubsw:
6529; BROADWELL: # BB#0:
6530; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006531; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6532; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006533;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006534; SKYLAKE-LABEL: test_psubsw:
6535; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006536; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6537; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6538; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006539;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006540; SKX-LABEL: test_psubsw:
6541; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006542; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6543; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6544; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006545;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006546; ZNVER1-LABEL: test_psubsw:
6547; ZNVER1: # BB#0:
6548; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6549; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6550; ZNVER1-NEXT: retq # sched: [1:0.50]
6551 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
6552 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6553 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2)
6554 ret <16 x i16> %3
6555}
6556declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
6557
6558define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6559; GENERIC-LABEL: test_psubusb:
6560; GENERIC: # BB#0:
6561; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6562; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6563; GENERIC-NEXT: retq # sched: [1:1.00]
6564;
6565; HASWELL-LABEL: test_psubusb:
6566; HASWELL: # BB#0:
6567; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6568; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6569; HASWELL-NEXT: retq # sched: [2:1.00]
6570;
Gadi Haber85d99b42017-10-17 13:45:39 +00006571; BROADWELL-LABEL: test_psubusb:
6572; BROADWELL: # BB#0:
6573; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006574; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6575; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006576;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006577; SKYLAKE-LABEL: test_psubusb:
6578; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006579; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6580; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6581; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006582;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006583; SKX-LABEL: test_psubusb:
6584; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006585; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6586; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6587; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006588;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006589; ZNVER1-LABEL: test_psubusb:
6590; ZNVER1: # BB#0:
6591; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6592; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6593; ZNVER1-NEXT: retq # sched: [1:0.50]
6594 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
6595 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6596 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2)
6597 ret <32 x i8> %3
6598}
6599declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
6600
6601define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6602; GENERIC-LABEL: test_psubusw:
6603; GENERIC: # BB#0:
6604; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6605; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6606; GENERIC-NEXT: retq # sched: [1:1.00]
6607;
6608; HASWELL-LABEL: test_psubusw:
6609; HASWELL: # BB#0:
6610; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6611; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6612; HASWELL-NEXT: retq # sched: [2:1.00]
6613;
Gadi Haber85d99b42017-10-17 13:45:39 +00006614; BROADWELL-LABEL: test_psubusw:
6615; BROADWELL: # BB#0:
6616; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006617; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6618; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006619;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006620; SKYLAKE-LABEL: test_psubusw:
6621; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006622; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6623; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6624; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006625;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006626; SKX-LABEL: test_psubusw:
6627; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006628; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6629; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6630; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006631;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006632; ZNVER1-LABEL: test_psubusw:
6633; ZNVER1: # BB#0:
6634; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6635; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6636; ZNVER1-NEXT: retq # sched: [1:0.50]
6637 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
6638 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6639 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2)
6640 ret <16 x i16> %3
6641}
6642declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
6643
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006644define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006645; GENERIC-LABEL: test_psubw:
6646; GENERIC: # BB#0:
6647; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6648; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6649; GENERIC-NEXT: retq # sched: [1:1.00]
6650;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006651; HASWELL-LABEL: test_psubw:
6652; HASWELL: # BB#0:
6653; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00006654; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6655; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006656;
Gadi Haber85d99b42017-10-17 13:45:39 +00006657; BROADWELL-LABEL: test_psubw:
6658; BROADWELL: # BB#0:
6659; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006660; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6661; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006662;
Gadi Haber767d98b2017-08-30 08:08:50 +00006663; SKYLAKE-LABEL: test_psubw:
6664; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006665; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6666; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6667; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006668;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006669; SKX-LABEL: test_psubw:
6670; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00006671; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6672; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6673; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006674;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006675; ZNVER1-LABEL: test_psubw:
6676; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00006677; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6678; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006679; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006680 %1 = sub <16 x i16> %a0, %a1
6681 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6682 %3 = sub <16 x i16> %1, %2
6683 ret <16 x i16> %3
6684}
6685
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006686define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6687; GENERIC-LABEL: test_punpckhbw:
6688; GENERIC: # BB#0:
6689; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6690; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
6691; GENERIC-NEXT: retq # sched: [1:1.00]
6692;
6693; HASWELL-LABEL: test_punpckhbw:
6694; HASWELL: # BB#0:
6695; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6696; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00]
6697; HASWELL-NEXT: retq # sched: [2:1.00]
6698;
Gadi Haber85d99b42017-10-17 13:45:39 +00006699; BROADWELL-LABEL: test_punpckhbw:
6700; BROADWELL: # BB#0:
6701; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006702; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
6703; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006704;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006705; SKYLAKE-LABEL: test_punpckhbw:
6706; SKYLAKE: # BB#0:
6707; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006708; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6709; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006710;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006711; SKX-LABEL: test_punpckhbw:
6712; SKX: # BB#0:
6713; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006714; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6715; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006716;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006717; ZNVER1-LABEL: test_punpckhbw:
6718; ZNVER1: # BB#0:
6719; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
6720; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
6721; ZNVER1-NEXT: retq # sched: [1:0.50]
6722 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6723 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6724 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6725 ret <32 x i8> %3
6726}
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006727
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006728define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6729; GENERIC-LABEL: test_punpckhdq:
6730; GENERIC: # BB#0:
6731; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6732; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
6733; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6734; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6735; GENERIC-NEXT: retq # sched: [1:1.00]
6736;
6737; HASWELL-LABEL: test_punpckhdq:
6738; HASWELL: # BB#0:
6739; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6740; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
6741; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6742; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6743; HASWELL-NEXT: retq # sched: [2:1.00]
6744;
Gadi Haber85d99b42017-10-17 13:45:39 +00006745; BROADWELL-LABEL: test_punpckhdq:
6746; BROADWELL: # BB#0:
6747; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006748; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006749; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6750; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006751; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006752;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006753; SKYLAKE-LABEL: test_punpckhdq:
6754; SKYLAKE: # BB#0:
6755; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006756; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6757; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6758; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6759; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006760;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006761; SKX-LABEL: test_punpckhdq:
6762; SKX: # BB#0:
6763; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006764; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6765; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6766; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6767; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006768;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006769; ZNVER1-LABEL: test_punpckhdq:
6770; ZNVER1: # BB#0:
6771; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
6772; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
6773; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6774; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6775; ZNVER1-NEXT: retq # sched: [1:0.50]
6776 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6777 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6778 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6779 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6780 ret <8 x i32> %4
6781}
6782
6783define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6784; GENERIC-LABEL: test_punpckhqdq:
6785; GENERIC: # BB#0:
6786; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6787; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
6788; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6789; GENERIC-NEXT: retq # sched: [1:1.00]
6790;
6791; HASWELL-LABEL: test_punpckhqdq:
6792; HASWELL: # BB#0:
6793; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6794; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00]
6795; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6796; HASWELL-NEXT: retq # sched: [2:1.00]
6797;
Gadi Haber85d99b42017-10-17 13:45:39 +00006798; BROADWELL-LABEL: test_punpckhqdq:
6799; BROADWELL: # BB#0:
6800; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006801; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006802; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006803; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006804;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006805; SKYLAKE-LABEL: test_punpckhqdq:
6806; SKYLAKE: # BB#0:
6807; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006808; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6809; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6810; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006811;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006812; SKX-LABEL: test_punpckhqdq:
6813; SKX: # BB#0:
6814; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006815; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6816; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6817; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006818;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006819; ZNVER1-LABEL: test_punpckhqdq:
6820; ZNVER1: # BB#0:
6821; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
6822; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
6823; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6824; ZNVER1-NEXT: retq # sched: [1:0.50]
6825 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6826 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6827 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6828 %4 = add <4 x i64> %1, %3
6829 ret <4 x i64> %4
6830}
6831
6832define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6833; GENERIC-LABEL: test_punpckhwd:
6834; GENERIC: # BB#0:
6835; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6836; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
6837; GENERIC-NEXT: retq # sched: [1:1.00]
6838;
6839; HASWELL-LABEL: test_punpckhwd:
6840; HASWELL: # BB#0:
6841; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6842; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00]
6843; HASWELL-NEXT: retq # sched: [2:1.00]
6844;
Gadi Haber85d99b42017-10-17 13:45:39 +00006845; BROADWELL-LABEL: test_punpckhwd:
6846; BROADWELL: # BB#0:
6847; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006848; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
6849; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006850;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006851; SKYLAKE-LABEL: test_punpckhwd:
6852; SKYLAKE: # BB#0:
6853; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006854; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6855; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006856;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006857; SKX-LABEL: test_punpckhwd:
6858; SKX: # BB#0:
6859; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006860; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6861; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006862;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006863; ZNVER1-LABEL: test_punpckhwd:
6864; ZNVER1: # BB#0:
6865; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
6866; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
6867; ZNVER1-NEXT: retq # sched: [1:0.50]
6868 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6869 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6870 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6871 ret <16 x i16> %3
6872}
6873
6874define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6875; GENERIC-LABEL: test_punpcklbw:
6876; GENERIC: # BB#0:
6877; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6878; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
6879; GENERIC-NEXT: retq # sched: [1:1.00]
6880;
6881; HASWELL-LABEL: test_punpcklbw:
6882; HASWELL: # BB#0:
6883; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6884; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00]
6885; HASWELL-NEXT: retq # sched: [2:1.00]
6886;
Gadi Haber85d99b42017-10-17 13:45:39 +00006887; BROADWELL-LABEL: test_punpcklbw:
6888; BROADWELL: # BB#0:
6889; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006890; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
6891; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006892;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006893; SKYLAKE-LABEL: test_punpcklbw:
6894; SKYLAKE: # BB#0:
6895; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006896; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6897; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006898;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006899; SKX-LABEL: test_punpcklbw:
6900; SKX: # BB#0:
6901; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006902; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6903; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006904;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006905; ZNVER1-LABEL: test_punpcklbw:
6906; ZNVER1: # BB#0:
6907; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
6908; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
6909; ZNVER1-NEXT: retq # sched: [1:0.50]
6910 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6911 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6912 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6913 ret <32 x i8> %3
6914}
6915
6916define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6917; GENERIC-LABEL: test_punpckldq:
6918; GENERIC: # BB#0:
6919; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6920; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
6921; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6922; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6923; GENERIC-NEXT: retq # sched: [1:1.00]
6924;
6925; HASWELL-LABEL: test_punpckldq:
6926; HASWELL: # BB#0:
6927; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6928; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
6929; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6930; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6931; HASWELL-NEXT: retq # sched: [2:1.00]
6932;
Gadi Haber85d99b42017-10-17 13:45:39 +00006933; BROADWELL-LABEL: test_punpckldq:
6934; BROADWELL: # BB#0:
6935; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006936; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006937; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6938; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006939; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006940;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006941; SKYLAKE-LABEL: test_punpckldq:
6942; SKYLAKE: # BB#0:
6943; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006944; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6945; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6946; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6947; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006948;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006949; SKX-LABEL: test_punpckldq:
6950; SKX: # BB#0:
6951; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006952; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6953; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6954; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6955; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006956;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006957; ZNVER1-LABEL: test_punpckldq:
6958; ZNVER1: # BB#0:
6959; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
6960; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
6961; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6962; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6963; ZNVER1-NEXT: retq # sched: [1:0.50]
6964 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6965 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6966 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6967 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6968 ret <8 x i32> %4
6969}
6970
6971define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6972; GENERIC-LABEL: test_punpcklqdq:
6973; GENERIC: # BB#0:
6974; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6975; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
6976; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6977; GENERIC-NEXT: retq # sched: [1:1.00]
6978;
6979; HASWELL-LABEL: test_punpcklqdq:
6980; HASWELL: # BB#0:
6981; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6982; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00]
6983; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6984; HASWELL-NEXT: retq # sched: [2:1.00]
6985;
Gadi Haber85d99b42017-10-17 13:45:39 +00006986; BROADWELL-LABEL: test_punpcklqdq:
6987; BROADWELL: # BB#0:
6988; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006989; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006990; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006991; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006992;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006993; SKYLAKE-LABEL: test_punpcklqdq:
6994; SKYLAKE: # BB#0:
6995; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006996; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6997; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6998; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006999;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007000; SKX-LABEL: test_punpcklqdq:
7001; SKX: # BB#0:
7002; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00007003; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
7004; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
7005; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007006;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007007; ZNVER1-LABEL: test_punpcklqdq:
7008; ZNVER1: # BB#0:
7009; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
7010; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
7011; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
7012; ZNVER1-NEXT: retq # sched: [1:0.50]
7013 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7014 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7015 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7016 %4 = add <4 x i64> %1, %3
7017 ret <4 x i64> %4
7018}
7019
7020define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
7021; GENERIC-LABEL: test_punpcklwd:
7022; GENERIC: # BB#0:
7023; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7024; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
7025; GENERIC-NEXT: retq # sched: [1:1.00]
7026;
7027; HASWELL-LABEL: test_punpcklwd:
7028; HASWELL: # BB#0:
7029; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7030; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00]
7031; HASWELL-NEXT: retq # sched: [2:1.00]
7032;
Gadi Haber85d99b42017-10-17 13:45:39 +00007033; BROADWELL-LABEL: test_punpcklwd:
7034; BROADWELL: # BB#0:
7035; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00007036; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
7037; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007038;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007039; SKYLAKE-LABEL: test_punpcklwd:
7040; SKYLAKE: # BB#0:
7041; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007042; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007045; SKX-LABEL: test_punpcklwd:
7046; SKX: # BB#0:
7047; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00007048; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7049; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007050;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007051; ZNVER1-LABEL: test_punpcklwd:
7052; ZNVER1: # BB#0:
7053; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
7054; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
7055; ZNVER1-NEXT: retq # sched: [1:0.50]
7056 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7057 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
7058 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7059 ret <16 x i16> %3
7060}
7061
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007062define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00007063; GENERIC-LABEL: test_pxor:
7064; GENERIC: # BB#0:
7065; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
7066; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
7067; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
7068; GENERIC-NEXT: retq # sched: [1:1.00]
7069;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007070; HASWELL-LABEL: test_pxor:
7071; HASWELL: # BB#0:
7072; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haberd76f7b82017-08-28 10:04:16 +00007073; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007074; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haberd76f7b82017-08-28 10:04:16 +00007075; HASWELL-NEXT: retq # sched: [2:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007076;
Gadi Haber85d99b42017-10-17 13:45:39 +00007077; BROADWELL-LABEL: test_pxor:
7078; BROADWELL: # BB#0:
7079; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00007080; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00007081; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00007082; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007083;
Gadi Haber767d98b2017-08-30 08:08:50 +00007084; SKYLAKE-LABEL: test_pxor:
7085; SKYLAKE: # BB#0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007086; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7087; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7088; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7089; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00007090;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007091; SKX-LABEL: test_pxor:
7092; SKX: # BB#0:
Gadi Haber684944b2017-10-08 12:52:54 +00007093; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7094; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7095; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7096; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007097;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007098; ZNVER1-LABEL: test_pxor:
7099; ZNVER1: # BB#0:
Craig Topper106b5b62017-07-19 02:45:14 +00007100; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7101; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7102; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00007103; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007104 %1 = xor <4 x i64> %a0, %a1
7105 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7106 %3 = xor <4 x i64> %1, %2
7107 %4 = add <4 x i64> %3, %a1
7108 ret <4 x i64> %4
7109}
7110
7111!0 = !{i32 1}