blob: 89903e7c5c92b389889a0f68a80ce9b0d5becc10 [file] [log] [blame]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim946f08c2017-05-06 13:46:09 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber85d99b42017-10-17 13:45:39 +00004; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
8
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00009define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
10; GENERIC-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000011; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000012; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
13; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
14; GENERIC-NEXT: retq # sched: [1:1.00]
15;
16; HASWELL-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +000018; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000019; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +000020; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000021;
Gadi Haber85d99b42017-10-17 13:45:39 +000022; BROADWELL-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000023; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +000024; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +000025; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +000026; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000027;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000028; SKYLAKE-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000029; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +000030; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
31; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
32; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000033;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000034; SKX-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +000036; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
37; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
38; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000039;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000040; ZNVER1-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000041; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000042; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
43; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
44; ZNVER1-NEXT: retq # sched: [1:0.50]
45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
47 %3 = add <8 x i32> %2, %a0
48 ret <8 x i32> %3
49}
50
Simon Pilgrim5a931c62017-09-12 11:17:01 +000051define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
52; GENERIC-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000053; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000054; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
55; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
56; GENERIC-NEXT: retq # sched: [1:1.00]
57;
58; HASWELL-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000060; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
61; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +000062; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000063;
Gadi Haber85d99b42017-10-17 13:45:39 +000064; BROADWELL-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000065; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +000066; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
67; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +000068; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000069;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000070; SKYLAKE-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000071; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000072; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +000073; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000074; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000075;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000076; SKX-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000078; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +000079; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
80; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000081;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000082; ZNVER1-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000083; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000084; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
85; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
86; ZNVER1-NEXT: retq # sched: [1:0.50]
87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
88 %2 = fadd <4 x double> %1, %1
89 ret <4 x double> %2
90}
91
92define <4 x float> @test_broadcastss(<4 x float> %a0) {
93; GENERIC-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000094; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000095; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
96; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
97; GENERIC-NEXT: retq # sched: [1:1.00]
98;
99; HASWELL-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000101; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
102; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000103; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000104;
Gadi Haber85d99b42017-10-17 13:45:39 +0000105; BROADWELL-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000106; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000107; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
108; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000109; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000110;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000111; SKYLAKE-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000112; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000113; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000114; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000115; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000116;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000117; SKX-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000119; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000120; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
121; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000122;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000123; ZNVER1-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000124; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000125; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
126; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
127; ZNVER1-NEXT: retq # sched: [1:0.50]
128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
129 %2 = fadd <4 x float> %1, %1
130 ret <4 x float> %2
131}
132
133define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
134; GENERIC-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000135; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000136; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
137; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
138; GENERIC-NEXT: retq # sched: [1:1.00]
139;
140; HASWELL-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000141; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000142; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
143; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000144; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000145;
Gadi Haber85d99b42017-10-17 13:45:39 +0000146; BROADWELL-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000147; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000148; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
149; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000150; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000151;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000152; SKYLAKE-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000153; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000154; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000155; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000156; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000157;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000158; SKX-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000159; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000160; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000161; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
162; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000163;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000164; ZNVER1-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000165; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000166; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
167; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
168; ZNVER1-NEXT: retq # sched: [1:0.50]
169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
170 %2 = fadd <8 x float> %1, %1
171 ret <8 x float> %2
172}
173
174define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
175; GENERIC-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000177; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
178; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
179; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
180; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +0000181; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000182; GENERIC-NEXT: retq # sched: [1:1.00]
183;
184; HASWELL-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000185; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000186; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
187; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
188; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
189; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
190; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000191; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000192;
Gadi Haber85d99b42017-10-17 13:45:39 +0000193; BROADWELL-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000194; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000195; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
196; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
197; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
198; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
199; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000200; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000201;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000202; SKYLAKE-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000203; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000204; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
205; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000206; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
207; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
208; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000209; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000210;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000211; SKX-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000212; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000213; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
214; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000215; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
216; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
217; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000218; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000219;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000220; ZNVER1-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000221; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000222; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
223; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
224; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25]
225; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50]
226; ZNVER1-NEXT: vzeroupper # sched: [100:?]
227; ZNVER1-NEXT: retq # sched: [1:0.50]
228 %1 = add <8 x i32> %a0, %a1
229 %2 = sub <8 x i32> %a0, %a1
230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
232 store <4 x i32> %3, <4 x i32> *%a2
233 ret <4 x i32> %4
234}
235
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000236define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
237; GENERIC-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000238; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000239; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000240; GENERIC-NEXT: retq # sched: [1:1.00]
241;
242; HASWELL-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000243; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000244; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
245; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000246;
Gadi Haber85d99b42017-10-17 13:45:39 +0000247; BROADWELL-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000248; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000249; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000251;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000252; SKYLAKE-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000253; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000254; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
255; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000256;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000257; SKX-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000258; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000259; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
260; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000261;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000262; ZNVER1-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000263; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000264; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
265; ZNVER1-NEXT: retq # sched: [1:0.50]
266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
267 ret <2 x double> %1
268}
269declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
270
271define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
272; GENERIC-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000273; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000274; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000275; GENERIC-NEXT: retq # sched: [1:1.00]
276;
277; HASWELL-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000278; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000279; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00]
280; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000281;
Gadi Haber85d99b42017-10-17 13:45:39 +0000282; BROADWELL-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000283; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000284; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
285; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000286;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000287; SKYLAKE-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000288; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000289; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
290; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000291;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000292; SKX-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000293; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000294; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
295; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000296;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000297; ZNVER1-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000298; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000299; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
300; ZNVER1-NEXT: retq # sched: [1:0.50]
301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
302 ret <4 x double> %1
303}
304declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
305
306define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
307; GENERIC-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000308; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000309; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000310; GENERIC-NEXT: retq # sched: [1:1.00]
311;
312; HASWELL-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000314; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
315; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000316;
Gadi Haber85d99b42017-10-17 13:45:39 +0000317; BROADWELL-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000319; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
320; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000321;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000322; SKYLAKE-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000323; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000324; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
325; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000326;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000327; SKX-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000328; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000329; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
330; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000331;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000332; ZNVER1-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000333; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000334; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
335; ZNVER1-NEXT: retq # sched: [1:0.50]
336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
337 ret <4 x float> %1
338}
339declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
340
341define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
342; GENERIC-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000343; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000344; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000345; GENERIC-NEXT: retq # sched: [1:1.00]
346;
347; HASWELL-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000348; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000349; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50]
350; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000351;
Gadi Haber85d99b42017-10-17 13:45:39 +0000352; BROADWELL-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000353; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000354; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
355; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000356;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000357; SKYLAKE-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000358; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000359; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
360; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000361;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000362; SKX-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000363; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000364; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
365; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000366;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000367; ZNVER1-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000368; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000369; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
370; ZNVER1-NEXT: retq # sched: [1:0.50]
371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
372 ret <8 x float> %1
373}
374declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
375
376define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
377; GENERIC-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000378; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000379; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000380; GENERIC-NEXT: retq # sched: [1:1.00]
381;
382; HASWELL-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000384; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
385; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000386;
Gadi Haber85d99b42017-10-17 13:45:39 +0000387; BROADWELL-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000388; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000389; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
390; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000391;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000392; SKYLAKE-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000393; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000394; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000397; SKX-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000398; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000399; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
400; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000401;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000402; ZNVER1-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000403; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000404; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
405; ZNVER1-NEXT: retq # sched: [1:0.50]
406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
407 ret <2 x double> %1
408}
409declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
410
411define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
412; GENERIC-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000413; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000414; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000415; GENERIC-NEXT: retq # sched: [1:1.00]
416;
417; HASWELL-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000418; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000419; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00]
420; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000421;
Gadi Haber85d99b42017-10-17 13:45:39 +0000422; BROADWELL-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000423; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000424; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
425; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000426;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000427; SKYLAKE-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000428; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000429; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
430; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000431;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000432; SKX-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000433; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000434; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
435; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000436;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000437; ZNVER1-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000438; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000439; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
440; ZNVER1-NEXT: retq # sched: [1:0.50]
441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
442 ret <4 x double> %1
443}
444declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
445
446define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
447; GENERIC-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000448; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000449; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000450; GENERIC-NEXT: retq # sched: [1:1.00]
451;
452; HASWELL-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000453; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000454; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
455; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000456;
Gadi Haber85d99b42017-10-17 13:45:39 +0000457; BROADWELL-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000458; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000459; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
460; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000461;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000462; SKYLAKE-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000463; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000464; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
465; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000466;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000467; SKX-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000468; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000469; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
470; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000471;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000472; ZNVER1-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000473; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000474; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
475; ZNVER1-NEXT: retq # sched: [1:0.50]
476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
477 ret <4 x float> %1
478}
479declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
480
481define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
482; GENERIC-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000483; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000484; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +0000485; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000486; GENERIC-NEXT: retq # sched: [1:1.00]
487;
488; HASWELL-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000489; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000490; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000491; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000492; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000493;
Gadi Haber85d99b42017-10-17 13:45:39 +0000494; BROADWELL-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000495; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000496; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000497; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000498; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000499;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000500; SKYLAKE-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000501; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000502; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000503; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000504; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000505;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000506; SKX-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000507; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000508; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000509; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000510; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000511;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000512; ZNVER1-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000513; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000514; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
515; ZNVER1-NEXT: vzeroupper # sched: [100:?]
516; ZNVER1-NEXT: retq # sched: [1:0.50]
517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
518 ret <4 x float> %1
519}
520declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
521
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000522define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
523; GENERIC-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000524; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000525; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
526; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
527; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
528; GENERIC-NEXT: retq # sched: [1:1.00]
529;
530; HASWELL-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000531; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000532; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000533; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000534; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000535; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000536;
Gadi Haber85d99b42017-10-17 13:45:39 +0000537; BROADWELL-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000538; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000539; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000540; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000541; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000542; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000543;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000544; SKYLAKE-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000545; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000546; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000547; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
548; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
549; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000550;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000551; SKX-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000552; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000553; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000554; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
555; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
556; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000557;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000558; ZNVER1-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000559; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000560; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
561; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
562; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
563; ZNVER1-NEXT: retq # sched: [1:0.50]
564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
569 %6 = add <8 x i32> %2, %5
570 ret <8 x i32> %6
571}
572
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000573define <4 x i64> @test_movntdqa(i8* %a0) {
574; GENERIC-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000575; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000576; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [4:0.50]
577; GENERIC-NEXT: retq # sched: [1:1.00]
578;
579; HASWELL-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000580; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000581; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
582; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000583;
Gadi Haber85d99b42017-10-17 13:45:39 +0000584; BROADWELL-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000585; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000586; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
587; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000588;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000589; SKYLAKE-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000590; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000591; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
592; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000593;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000594; SKX-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000595; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000596; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
597; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000598;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000599; ZNVER1-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000600; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000601; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
602; ZNVER1-NEXT: retq # sched: [1:0.50]
603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
604 ret <4 x i64> %1
605}
606declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
607
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000608define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
609; GENERIC-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000610; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000611; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
612; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
613; GENERIC-NEXT: retq # sched: [1:1.00]
614;
615; HASWELL-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000616; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000617; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000618; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
619; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000620;
Gadi Haber85d99b42017-10-17 13:45:39 +0000621; BROADWELL-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000622; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000623; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000624; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
625; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000626;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000627; SKYLAKE-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000628; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000629; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000630; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
631; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000632;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000633; SKX-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000634; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000635; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000636; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
637; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000638;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000639; ZNVER1-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000640; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000641; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
642; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
643; ZNVER1-NEXT: retq # sched: [1:0.50]
644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
645 %2 = bitcast <16 x i16> %1 to <32 x i8>
646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
648 ret <16 x i16> %4
649}
650declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
651
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000652define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000653; GENERIC-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000654; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000655; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
656; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00]
657; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
658; GENERIC-NEXT: retq # sched: [1:1.00]
659;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000660; HASWELL-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000661; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000662; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000663; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000664; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000665; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000666;
Gadi Haber85d99b42017-10-17 13:45:39 +0000667; BROADWELL-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000668; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000669; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000670; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000671; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000672; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000673;
Gadi Haber767d98b2017-08-30 08:08:50 +0000674; SKYLAKE-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000675; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000676; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
677; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
678; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
679; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000680;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000681; SKX-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000682; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000683; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
684; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
685; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
686; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000687;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000688; ZNVER1-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000689; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000690; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
691; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
692; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000693; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
697 %4 = or <32 x i8> %1, %3
698 ret <32 x i8> %4
699}
700declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
701
702define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000703; GENERIC-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000704; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000705; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
706; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00]
707; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
708; GENERIC-NEXT: retq # sched: [1:1.00]
709;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000710; HASWELL-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000711; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000712; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000713; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000714; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000715; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000716;
Gadi Haber85d99b42017-10-17 13:45:39 +0000717; BROADWELL-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000718; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000719; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000720; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000721; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000723;
Gadi Haber767d98b2017-08-30 08:08:50 +0000724; SKYLAKE-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000725; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000726; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
727; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
728; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
729; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000730;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000731; SKX-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000732; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000733; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
734; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
735; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
736; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000737;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000738; ZNVER1-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000739; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000740; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
741; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
742; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000743; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
747 %4 = or <8 x i32> %1, %3
748 ret <8 x i32> %4
749}
750declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
751
752define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000753; GENERIC-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000754; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000755; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
756; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00]
757; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
758; GENERIC-NEXT: retq # sched: [1:1.00]
759;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000760; HASWELL-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000761; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000762; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000763; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000764; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000765; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000766;
Gadi Haber85d99b42017-10-17 13:45:39 +0000767; BROADWELL-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000768; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000769; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000770; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000771; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000772; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000773;
Gadi Haber767d98b2017-08-30 08:08:50 +0000774; SKYLAKE-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000775; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000776; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
777; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
778; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
779; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000780;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000781; SKX-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000782; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000783; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
784; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
785; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
786; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000787;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000788; ZNVER1-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000789; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000790; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
791; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
792; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000793; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
797 %4 = or <16 x i16> %1, %3
798 ret <16 x i16> %4
799}
800declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
801
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000802define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
803; GENERIC-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000804; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000805; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
806; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
807; GENERIC-NEXT: retq # sched: [1:1.00]
808;
809; HASWELL-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000810; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000811; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000812; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
813; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000814;
Gadi Haber85d99b42017-10-17 13:45:39 +0000815; BROADWELL-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000816; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000817; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000818; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
819; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000820;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000821; SKYLAKE-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000822; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000823; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000824; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
825; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000826;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000827; SKX-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000828; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000829; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000830; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
831; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000832;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000833; ZNVER1-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000834; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000835; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
836; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
837; ZNVER1-NEXT: retq # sched: [1:0.50]
838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
839 %2 = bitcast <16 x i16> %1 to <8 x i32>
840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
842 ret <16 x i16> %4
843}
844declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
845
846define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
847; GENERIC-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000848; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000849; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
850; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
851; GENERIC-NEXT: retq # sched: [1:1.00]
852;
853; HASWELL-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000854; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000855; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000856; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
857; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000858;
Gadi Haber85d99b42017-10-17 13:45:39 +0000859; BROADWELL-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000860; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000861; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000862; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
863; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000864;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000865; SKYLAKE-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000866; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000867; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000868; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
869; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000870;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000871; SKX-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000872; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000873; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000874; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
875; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000876;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000877; ZNVER1-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000878; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000879; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
880; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
881; ZNVER1-NEXT: retq # sched: [1:0.50]
882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
883 %2 = bitcast <32 x i8> %1 to <16 x i16>
884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
886 ret <32 x i8> %4
887}
888declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
889
890define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
891; GENERIC-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000892; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000893; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
894; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
895; GENERIC-NEXT: retq # sched: [1:1.00]
896;
897; HASWELL-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000898; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000899; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000900; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
901; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000902;
Gadi Haber85d99b42017-10-17 13:45:39 +0000903; BROADWELL-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000904; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000905; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000906; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
907; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000908;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000909; SKYLAKE-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000910; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000911; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000912; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
913; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000914;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000915; SKX-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000916; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000917; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000918; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
919; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000920;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000921; ZNVER1-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000922; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000923; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
924; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
925; ZNVER1-NEXT: retq # sched: [1:0.50]
926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
927 %2 = bitcast <16 x i16> %1 to <8 x i32>
928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
930 ret <16 x i16> %4
931}
932declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
933
934define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
935; GENERIC-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000936; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000937; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
938; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
939; GENERIC-NEXT: retq # sched: [1:1.00]
940;
941; HASWELL-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000942; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000943; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000944; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
945; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000946;
Gadi Haber85d99b42017-10-17 13:45:39 +0000947; BROADWELL-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000948; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000949; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000950; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
951; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000952;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000953; SKYLAKE-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000954; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000955; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000956; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
957; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000958;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000959; SKX-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000960; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000961; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000962; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
963; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000964;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000965; ZNVER1-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000966; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000967; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
968; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
969; ZNVER1-NEXT: retq # sched: [1:0.50]
970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
971 %2 = bitcast <32 x i8> %1 to <16 x i16>
972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
974 ret <32 x i8> %4
975}
976declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
977
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000978define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000979; GENERIC-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000980; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000981; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
982; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
983; GENERIC-NEXT: retq # sched: [1:1.00]
984;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000985; HASWELL-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000986; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000987; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000988; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
989; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000990;
Gadi Haber85d99b42017-10-17 13:45:39 +0000991; BROADWELL-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000992; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000993; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000994; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
995; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000996;
Gadi Haber767d98b2017-08-30 08:08:50 +0000997; SKYLAKE-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000998; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000999; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1000; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1001; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001002;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001003; SKX-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001004; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001005; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1006; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1007; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001008;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001009; ZNVER1-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001010; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001011; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1012; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001013; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001014 %1 = add <32 x i8> %a0, %a1
1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1016 %3 = add <32 x i8> %1, %2
1017 ret <32 x i8> %3
1018}
1019
1020define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001021; GENERIC-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001022; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001023; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1024; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1025; GENERIC-NEXT: retq # sched: [1:1.00]
1026;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001027; HASWELL-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001028; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001029; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001030; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1031; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001032;
Gadi Haber85d99b42017-10-17 13:45:39 +00001033; BROADWELL-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001034; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001035; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001036; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1037; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001038;
Gadi Haber767d98b2017-08-30 08:08:50 +00001039; SKYLAKE-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001040; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001041; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1042; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001045; SKX-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001046; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001047; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1048; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1049; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001050;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001051; ZNVER1-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001052; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001053; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1054; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001055; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001056 %1 = add <8 x i32> %a0, %a1
1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1058 %3 = add <8 x i32> %1, %2
1059 ret <8 x i32> %3
1060}
1061
1062define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001063; GENERIC-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001064; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001065; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1066; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1067; GENERIC-NEXT: retq # sched: [1:1.00]
1068;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001069; HASWELL-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001070; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001071; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001072; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1073; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001074;
Gadi Haber85d99b42017-10-17 13:45:39 +00001075; BROADWELL-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001076; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001077; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001078; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1079; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001080;
Gadi Haber767d98b2017-08-30 08:08:50 +00001081; SKYLAKE-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001082; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001083; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1084; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1085; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001086;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001087; SKX-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001088; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001089; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1090; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1091; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001092;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001093; ZNVER1-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001094; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001095; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1096; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001097; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001098 %1 = add <4 x i64> %a0, %a1
1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1100 %3 = add <4 x i64> %1, %2
1101 ret <4 x i64> %3
1102}
1103
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001104define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1105; GENERIC-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001106; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001107; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1108; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1109; GENERIC-NEXT: retq # sched: [1:1.00]
1110;
1111; HASWELL-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001112; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001113; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001114; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1115; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001116;
Gadi Haber85d99b42017-10-17 13:45:39 +00001117; BROADWELL-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001118; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001119; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001120; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1121; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001122;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001123; SKYLAKE-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001124; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001125; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1126; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1127; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001128;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001129; SKX-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001130; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001131; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1132; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1133; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001134;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001135; ZNVER1-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001136; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001137; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1138; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1139; ZNVER1-NEXT: retq # sched: [1:0.50]
1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2)
1143 ret <32 x i8> %3
1144}
1145declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
1146
1147define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1148; GENERIC-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001149; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001150; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1151; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1152; GENERIC-NEXT: retq # sched: [1:1.00]
1153;
1154; HASWELL-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001155; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001156; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001157; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1158; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001159;
Gadi Haber85d99b42017-10-17 13:45:39 +00001160; BROADWELL-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001161; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001162; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001163; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1164; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001165;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001166; SKYLAKE-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001167; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001168; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1169; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1170; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001171;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001172; SKX-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001173; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001174; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1175; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1176; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001177;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001178; ZNVER1-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001179; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001180; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1181; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1182; ZNVER1-NEXT: retq # sched: [1:0.50]
1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2)
1186 ret <16 x i16> %3
1187}
1188declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
1189
1190define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1191; GENERIC-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001192; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001193; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1194; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1195; GENERIC-NEXT: retq # sched: [1:1.00]
1196;
1197; HASWELL-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001198; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001199; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001200; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1201; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001202;
Gadi Haber85d99b42017-10-17 13:45:39 +00001203; BROADWELL-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001204; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001205; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001206; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1207; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001208;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001209; SKYLAKE-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001210; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001211; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1212; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1213; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001214;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001215; SKX-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001216; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001217; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1218; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1219; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001220;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001221; ZNVER1-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001222; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001223; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1224; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1225; ZNVER1-NEXT: retq # sched: [1:0.50]
1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2)
1229 ret <32 x i8> %3
1230}
1231declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
1232
1233define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1234; GENERIC-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001235; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001236; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1237; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1238; GENERIC-NEXT: retq # sched: [1:1.00]
1239;
1240; HASWELL-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001241; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001242; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001243; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1244; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001245;
Gadi Haber85d99b42017-10-17 13:45:39 +00001246; BROADWELL-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001247; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001248; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001249; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001251;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001252; SKYLAKE-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001253; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001254; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1255; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1256; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001257;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001258; SKX-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001259; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001260; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1261; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1262; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001263;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001264; ZNVER1-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001265; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001266; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1267; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1268; ZNVER1-NEXT: retq # sched: [1:0.50]
1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2)
1272 ret <16 x i16> %3
1273}
1274declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
1275
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001276define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001277; GENERIC-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001278; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001279; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1280; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1281; GENERIC-NEXT: retq # sched: [1:1.00]
1282;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001283; HASWELL-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001284; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001285; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001286; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1287; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001288;
Gadi Haber85d99b42017-10-17 13:45:39 +00001289; BROADWELL-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001290; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001291; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001292; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1293; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001294;
Gadi Haber767d98b2017-08-30 08:08:50 +00001295; SKYLAKE-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001296; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001297; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1298; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1299; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001300;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001301; SKX-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001302; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001303; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1304; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1305; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001306;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001307; ZNVER1-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001308; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001309; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1310; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001311; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001312 %1 = add <16 x i16> %a0, %a1
1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1314 %3 = add <16 x i16> %1, %2
1315 ret <16 x i16> %3
1316}
1317
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001318define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1319; GENERIC-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001320; GENERIC: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001321; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1322; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1323; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001324; GENERIC-NEXT: retq # sched: [1:1.00]
1325;
1326; HASWELL-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001327; HASWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001328; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1329; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1330; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001331; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001332;
Gadi Haber85d99b42017-10-17 13:45:39 +00001333; BROADWELL-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001334; BROADWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001335; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1336; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1337; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001338; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001339;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001340; SKYLAKE-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001341; SKYLAKE: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001342; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1343; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1344; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001345; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001346;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001347; SKX-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001348; SKX: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001349; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1350; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1351; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001352; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001353;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001354; ZNVER1-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001355; ZNVER1: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001356; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1357; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25]
1358; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001359; ZNVER1-NEXT: retq # sched: [1:0.50]
1360 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1361 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001362 %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1363 %4 = add <32 x i8> %1, %3
1364 ret <32 x i8> %4
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001365}
1366
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001367define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001368; GENERIC-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001369; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001370; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1371; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
1372; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1373; GENERIC-NEXT: retq # sched: [1:1.00]
1374;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001375; HASWELL-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001376; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001377; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001378; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001379; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001380; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001381;
Gadi Haber85d99b42017-10-17 13:45:39 +00001382; BROADWELL-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001383; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001384; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001385; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001386; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001387; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001388;
Gadi Haber767d98b2017-08-30 08:08:50 +00001389; SKYLAKE-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001390; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001391; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1392; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1393; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1394; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001395;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001396; SKX-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001397; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001398; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1399; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1400; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1401; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001402;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001403; ZNVER1-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001404; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001405; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1406; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1407; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001408; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001409 %1 = and <4 x i64> %a0, %a1
1410 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1411 %3 = and <4 x i64> %1, %2
1412 %4 = add <4 x i64> %3, %a1
1413 ret <4 x i64> %4
1414}
1415
1416define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001417; GENERIC-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001418; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001419; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1420; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
1421; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1422; GENERIC-NEXT: retq # sched: [1:1.00]
1423;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001424; HASWELL-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001425; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001426; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001427; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001428; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001429; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001430;
Gadi Haber85d99b42017-10-17 13:45:39 +00001431; BROADWELL-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001432; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001433; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001434; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001435; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001436; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001437;
Gadi Haber767d98b2017-08-30 08:08:50 +00001438; SKYLAKE-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001439; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001440; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1441; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1442; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1443; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001444;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001445; SKX-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001446; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001447; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1448; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1449; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1450; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001451;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001452; ZNVER1-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001453; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001454; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1455; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1456; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001457; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001458 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1459 %2 = and <4 x i64> %a1, %1
1460 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1461 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1462 %5 = and <4 x i64> %3, %4
1463 %6 = add <4 x i64> %2, %5
1464 ret <4 x i64> %6
1465}
1466
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001467define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1468; GENERIC-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001469; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001470; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1471; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1472; GENERIC-NEXT: retq # sched: [1:1.00]
1473;
1474; HASWELL-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001475; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001476; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001477; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1478; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001479;
Gadi Haber85d99b42017-10-17 13:45:39 +00001480; BROADWELL-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001481; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001482; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001483; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1484; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001485;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001486; SKYLAKE-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001487; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001488; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1489; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1490; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001491;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001492; SKX-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001493; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001494; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1495; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1496; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001497;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001498; ZNVER1-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001499; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001500; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1501; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1502; ZNVER1-NEXT: retq # sched: [1:0.50]
1503 %1 = zext <32 x i8> %a0 to <32 x i16>
1504 %2 = zext <32 x i8> %a1 to <32 x i16>
1505 %3 = add <32 x i16> %1, %2
1506 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1507 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1508 %6 = trunc <32 x i16> %5 to <32 x i8>
1509 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1510 %8 = zext <32 x i8> %6 to <32 x i16>
1511 %9 = zext <32 x i8> %7 to <32 x i16>
1512 %10 = add <32 x i16> %8, %9
1513 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1514 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1515 %13 = trunc <32 x i16> %12 to <32 x i8>
1516 ret <32 x i8> %13
1517}
1518
1519define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1520; GENERIC-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001521; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001522; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1523; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1524; GENERIC-NEXT: retq # sched: [1:1.00]
1525;
1526; HASWELL-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001527; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001528; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001529; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1530; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001531;
Gadi Haber85d99b42017-10-17 13:45:39 +00001532; BROADWELL-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001533; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001534; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001535; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1536; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001537;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001538; SKYLAKE-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001539; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001540; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1541; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1542; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001543;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001544; SKX-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001545; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001546; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1547; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1548; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001549;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001550; ZNVER1-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001551; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001552; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1553; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1554; ZNVER1-NEXT: retq # sched: [1:0.50]
1555 %1 = zext <16 x i16> %a0 to <16 x i32>
1556 %2 = zext <16 x i16> %a1 to <16 x i32>
1557 %3 = add <16 x i32> %1, %2
1558 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1559 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1560 %6 = trunc <16 x i32> %5 to <16 x i16>
1561 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1562 %8 = zext <16 x i16> %6 to <16 x i32>
1563 %9 = zext <16 x i16> %7 to <16 x i32>
1564 %10 = add <16 x i32> %8, %9
1565 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1566 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1567 %13 = trunc <16 x i32> %12 to <16 x i16>
1568 ret <16 x i16> %13
1569}
1570
1571define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1572; GENERIC-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001573; GENERIC: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001574; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001575; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
1576; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1577; GENERIC-NEXT: retq # sched: [1:1.00]
1578;
1579; HASWELL-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001580; HASWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001581; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001582; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001583; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001584; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001585;
Gadi Haber85d99b42017-10-17 13:45:39 +00001586; BROADWELL-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001587; BROADWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001588; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001589; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001590; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001591; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001592;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001593; SKYLAKE-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001594; SKYLAKE: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001595; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001596; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1597; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1598; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001599;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001600; SKX-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001601; SKX: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001602; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001603; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1604; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1605; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001606;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001607; ZNVER1-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001608; ZNVER1: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001609; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001610; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1611; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1612; ZNVER1-NEXT: retq # sched: [1:0.50]
1613 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1614 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001615 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1616 %4 = add <4 x i32> %1, %3
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001617 ret <4 x i32> %4
1618}
1619
1620define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1621; GENERIC-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001622; GENERIC: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001623; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001624; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
1625; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1626; GENERIC-NEXT: retq # sched: [1:1.00]
1627;
1628; HASWELL-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001629; HASWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001630; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001631; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001632; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001633; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001634;
Gadi Haber85d99b42017-10-17 13:45:39 +00001635; BROADWELL-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001636; BROADWELL: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001637; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001638; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001639; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001640; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001641;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001642; SKYLAKE-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001643; SKYLAKE: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001644; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001645; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1646; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1647; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001648;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001649; SKX-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001650; SKX: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001651; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001652; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1653; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1654; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001655;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001656; ZNVER1-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001657; ZNVER1: # %bb.0:
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001658; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001659; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1660; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1661; ZNVER1-NEXT: retq # sched: [1:0.50]
1662 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1663 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001664 %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1665 %4 = add <8 x i32> %1, %3
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001666 ret <8 x i32> %4
1667}
1668
1669define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1670; GENERIC-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001671; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001672; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1673; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1674; GENERIC-NEXT: retq # sched: [1:1.00]
1675;
1676; HASWELL-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001677; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001678; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001679; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
1680; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001681;
Gadi Haber85d99b42017-10-17 13:45:39 +00001682; BROADWELL-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001683; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001684; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001685; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1686; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001687;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001688; SKYLAKE-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001689; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001690; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001691; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1692; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001693;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001694; SKX-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001695; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001696; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00001697; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1698; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001699;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001700; ZNVER1-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001701; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001702; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1703; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1704; ZNVER1-NEXT: retq # sched: [1:0.50]
1705 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1706 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1707 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1708 ret <32 x i8> %3
1709}
1710declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1711
1712define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1713; GENERIC-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001714; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001715; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001716; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [5:0.50]
1717; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001718; GENERIC-NEXT: retq # sched: [1:1.00]
1719;
1720; HASWELL-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001721; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001722; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001723; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1724; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001725; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001726;
Gadi Haber85d99b42017-10-17 13:45:39 +00001727; BROADWELL-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001728; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001729; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001730; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00]
1731; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001732; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001733;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001734; SKYLAKE-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001735; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001736; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001737; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1738; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001739; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001740;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001741; SKX-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001742; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001743; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001744; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1745; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00001746; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001747;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001748; ZNVER1-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001749; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001750; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001751; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50]
1752; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001753; ZNVER1-NEXT: retq # sched: [1:0.50]
1754 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1755 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
Simon Pilgrim8fb1dd82018-02-03 21:20:19 +00001756 %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1757 %4 = add <16 x i16> %1, %3
1758 ret <16 x i16> %4
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001759}
1760
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001761define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1762; GENERIC-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001763; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001764; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
1765; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
1766; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1767; GENERIC-NEXT: retq # sched: [1:1.00]
1768;
1769; HASWELL-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001770; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001771; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001772; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001773; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001774; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001775;
Gadi Haber85d99b42017-10-17 13:45:39 +00001776; BROADWELL-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001777; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00001778; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00001779; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001780; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001781; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001782;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001783; SKYLAKE-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001784; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001785; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001786; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1787; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1788; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001789;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001790; SKX-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001791; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001792; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001793; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1794; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1795; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001796;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001797; ZNVER1-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001798; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001799; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1800; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1801; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1802; ZNVER1-NEXT: retq # sched: [1:0.50]
1803 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1804 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1805 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1806 %4 = add <16 x i8> %1, %3
1807 ret <16 x i8> %4
1808}
1809
1810define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1811; GENERIC-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001812; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001813; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1814; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
1815; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1816; GENERIC-NEXT: retq # sched: [1:1.00]
1817;
1818; HASWELL-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001819; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001820; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001821; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001822; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001823; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001824;
Gadi Haber85d99b42017-10-17 13:45:39 +00001825; BROADWELL-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001826; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00001827; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00001828; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001829; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001830; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001831;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001832; SKYLAKE-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001833; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001834; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001835; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1836; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1837; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001838;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001839; SKX-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001840; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001841; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001842; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1843; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1844; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001845;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001846; ZNVER1-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001847; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001848; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1849; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1850; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1851; ZNVER1-NEXT: retq # sched: [1:0.50]
1852 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1853 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1854 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1855 %4 = add <32 x i8> %1, %3
1856 ret <32 x i8> %4
1857}
1858
1859define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1860; GENERIC-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001861; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001862; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1863; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
1864; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1865; GENERIC-NEXT: retq # sched: [1:1.00]
1866;
1867; HASWELL-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001868; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001869; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001870; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001871; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001872; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001873;
Gadi Haber85d99b42017-10-17 13:45:39 +00001874; BROADWELL-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001875; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001876; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001877; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001878; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001879; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001880;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001881; SKYLAKE-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001882; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001883; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001884; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1885; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1886; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001887;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001888; SKX-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001889; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001890; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001891; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
1892; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001893;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001894; ZNVER1-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001895; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001896; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1897; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1898; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1899; ZNVER1-NEXT: retq # sched: [1:0.50]
1900 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1901 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1902 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1903 %4 = add <4 x i32> %1, %3
1904 ret <4 x i32> %4
1905}
1906
1907define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1908; GENERIC-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001909; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001910; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1911; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
1912; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1913; GENERIC-NEXT: retq # sched: [1:1.00]
1914;
1915; HASWELL-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001916; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001917; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001918; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001919; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001920; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001921;
Gadi Haber85d99b42017-10-17 13:45:39 +00001922; BROADWELL-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001923; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001924; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001925; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001926; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001927; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001928;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001929; SKYLAKE-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001930; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001931; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001932; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1933; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1934; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001935;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001936; SKX-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001937; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001938; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001939; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1940; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001941;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001942; ZNVER1-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001943; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001944; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1945; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1946; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1947; ZNVER1-NEXT: retq # sched: [1:0.50]
1948 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1949 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1950 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1951 %4 = add <8 x i32> %1, %3
1952 ret <8 x i32> %4
1953}
1954
1955define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1956; GENERIC-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001957; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001958; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1959; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
1960; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1961; GENERIC-NEXT: retq # sched: [1:1.00]
1962;
1963; HASWELL-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001964; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001965; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001966; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001967; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001968; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001969;
Gadi Haber85d99b42017-10-17 13:45:39 +00001970; BROADWELL-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001971; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001972; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001973; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001974; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001975; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001976;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001977; SKYLAKE-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001978; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001979; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001980; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1981; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1982; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001983;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001984; SKX-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001985; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001986; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001987; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50]
1988; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001989;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001990; ZNVER1-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001991; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001992; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1993; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1994; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1995; ZNVER1-NEXT: retq # sched: [1:0.50]
1996 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1997 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1998 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1999 %4 = add <2 x i64> %1, %3
2000 ret <2 x i64> %4
2001}
2002
2003define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
2004; GENERIC-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002005; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002006; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
2007; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
2008; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2009; GENERIC-NEXT: retq # sched: [1:1.00]
2010;
2011; HASWELL-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002012; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002013; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002014; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002015; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002016; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002017;
Gadi Haber85d99b42017-10-17 13:45:39 +00002018; BROADWELL-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002019; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002020; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002021; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002022; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002023; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002024;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002025; SKYLAKE-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002026; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002027; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002028; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2029; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2030; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002031;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002032; SKX-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002033; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002034; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002035; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50]
2036; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002037;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002038; ZNVER1-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002039; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002040; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
2041; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
2042; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2043; ZNVER1-NEXT: retq # sched: [1:0.50]
2044 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
2045 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2046 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
2047 %4 = add <4 x i64> %1, %3
2048 ret <4 x i64> %4
2049}
2050
2051define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
2052; GENERIC-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002053; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002054; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
2055; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
2056; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2057; GENERIC-NEXT: retq # sched: [1:1.00]
2058;
2059; HASWELL-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002060; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002061; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002062; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002063; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002064; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002065;
Gadi Haber85d99b42017-10-17 13:45:39 +00002066; BROADWELL-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002067; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00002068; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00002069; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002070; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002071; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002072;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002073; SKYLAKE-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002074; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002075; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002076; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2077; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2078; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002079;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002080; SKX-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002081; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002082; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002083; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2084; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2085; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002086;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002087; ZNVER1-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002088; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002089; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
2090; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
2091; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2092; ZNVER1-NEXT: retq # sched: [1:0.50]
2093 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
2094 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
2095 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
2096 %4 = add <8 x i16> %1, %3
2097 ret <8 x i16> %4
2098}
2099
2100define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
2101; GENERIC-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002102; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002103; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
2104; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
2105; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2106; GENERIC-NEXT: retq # sched: [1:1.00]
2107;
2108; HASWELL-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002109; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002110; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002111; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002112; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002113; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002114;
Gadi Haber85d99b42017-10-17 13:45:39 +00002115; BROADWELL-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002116; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00002117; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00002118; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002119; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002120; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002121;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002122; SKYLAKE-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002123; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002124; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002125; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2126; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2127; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002128;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002129; SKX-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002130; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002131; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002132; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2133; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2134; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002135;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002136; ZNVER1-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002137; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002138; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
2139; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
2140; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2141; ZNVER1-NEXT: retq # sched: [1:0.50]
2142 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
2143 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
2144 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
2145 %4 = add <16 x i16> %1, %3
2146 ret <16 x i16> %4
2147}
2148
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002149define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2150; GENERIC-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002151; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002152; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2153; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2154; GENERIC-NEXT: retq # sched: [1:1.00]
2155;
2156; HASWELL-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002157; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002158; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002159; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2160; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002161;
Gadi Haber85d99b42017-10-17 13:45:39 +00002162; BROADWELL-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002163; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002164; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002165; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2166; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002167;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002168; SKYLAKE-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002169; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002170; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2171; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2172; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002173;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002174; SKX-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002175; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002176; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2177; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002178; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002179;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002180; ZNVER1-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002181; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002182; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2183; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2184; ZNVER1-NEXT: retq # sched: [1:0.50]
2185 %1 = icmp eq <32 x i8> %a0, %a1
2186 %2 = sext <32 x i1> %1 to <32 x i8>
2187 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2188 %4 = icmp eq <32 x i8> %2, %3
2189 %5 = sext <32 x i1> %4 to <32 x i8>
2190 ret <32 x i8> %5
2191}
2192
2193define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2194; GENERIC-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002195; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002196; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2197; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2198; GENERIC-NEXT: retq # sched: [1:1.00]
2199;
2200; HASWELL-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002201; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002202; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002203; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2204; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002205;
Gadi Haber85d99b42017-10-17 13:45:39 +00002206; BROADWELL-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002207; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002208; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002209; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2210; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002211;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002212; SKYLAKE-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002213; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002214; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2215; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2216; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002217;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002218; SKX-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002219; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002220; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2221; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002222; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002223;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002224; ZNVER1-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002225; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002226; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2227; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2228; ZNVER1-NEXT: retq # sched: [1:0.50]
2229 %1 = icmp eq <8 x i32> %a0, %a1
2230 %2 = sext <8 x i1> %1 to <8 x i32>
2231 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2232 %4 = icmp eq <8 x i32> %2, %3
2233 %5 = sext <8 x i1> %4 to <8 x i32>
2234 ret <8 x i32> %5
2235}
2236
2237define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2238; GENERIC-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002239; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002240; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2241; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2242; GENERIC-NEXT: retq # sched: [1:1.00]
2243;
2244; HASWELL-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002245; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002246; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002247; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2248; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002249;
Gadi Haber85d99b42017-10-17 13:45:39 +00002250; BROADWELL-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002251; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002252; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002253; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2254; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002255;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002256; SKYLAKE-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002257; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002258; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2259; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2260; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002261;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002262; SKX-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002263; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002264; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2265; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002266; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002267;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002268; ZNVER1-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002269; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002270; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2271; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2272; ZNVER1-NEXT: retq # sched: [1:0.50]
2273 %1 = icmp eq <4 x i64> %a0, %a1
2274 %2 = sext <4 x i1> %1 to <4 x i64>
2275 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2276 %4 = icmp eq <4 x i64> %2, %3
2277 %5 = sext <4 x i1> %4 to <4 x i64>
2278 ret <4 x i64> %5
2279}
2280
2281define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2282; GENERIC-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002283; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002284; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2285; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2286; GENERIC-NEXT: retq # sched: [1:1.00]
2287;
2288; HASWELL-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002289; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002290; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002291; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2292; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002293;
Gadi Haber85d99b42017-10-17 13:45:39 +00002294; BROADWELL-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002295; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002296; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002297; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2298; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002299;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002300; SKYLAKE-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002301; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002302; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2303; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2304; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002305;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002306; SKX-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002307; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002308; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2309; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002310; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002311;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002312; ZNVER1-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002313; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002314; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2315; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2316; ZNVER1-NEXT: retq # sched: [1:0.50]
2317 %1 = icmp eq <16 x i16> %a0, %a1
2318 %2 = sext <16 x i1> %1 to <16 x i16>
2319 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2320 %4 = icmp eq <16 x i16> %2, %3
2321 %5 = sext <16 x i1> %4 to <16 x i16>
2322 ret <16 x i16> %5
2323}
2324
2325define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2326; GENERIC-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002327; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002328; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2329; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2330; GENERIC-NEXT: retq # sched: [1:1.00]
2331;
2332; HASWELL-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002333; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002334; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002335; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2336; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002337;
Gadi Haber85d99b42017-10-17 13:45:39 +00002338; BROADWELL-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002339; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002340; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002341; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2342; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002343;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002344; SKYLAKE-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002345; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002346; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2347; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2348; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002349;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002350; SKX-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002351; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002352; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2353; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002354; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002355;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002356; ZNVER1-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002357; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002358; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2359; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2360; ZNVER1-NEXT: retq # sched: [1:0.50]
2361 %1 = icmp sgt <32 x i8> %a0, %a1
2362 %2 = sext <32 x i1> %1 to <32 x i8>
2363 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2364 %4 = icmp sgt <32 x i8> %2, %3
2365 %5 = sext <32 x i1> %4 to <32 x i8>
2366 ret <32 x i8> %5
2367}
2368
2369define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2370; GENERIC-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002371; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002372; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2373; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2374; GENERIC-NEXT: retq # sched: [1:1.00]
2375;
2376; HASWELL-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002377; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002378; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002379; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2380; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002381;
Gadi Haber85d99b42017-10-17 13:45:39 +00002382; BROADWELL-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002383; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002384; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002385; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2386; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002387;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002388; SKYLAKE-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002389; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002390; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2391; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2392; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002393;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002394; SKX-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002395; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002396; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2397; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002398; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002399;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002400; ZNVER1-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002401; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002402; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2403; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2404; ZNVER1-NEXT: retq # sched: [1:0.50]
2405 %1 = icmp sgt <8 x i32> %a0, %a1
2406 %2 = sext <8 x i1> %1 to <8 x i32>
2407 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2408 %4 = icmp sgt <8 x i32> %2, %3
2409 %5 = sext <8 x i1> %4 to <8 x i32>
2410 ret <8 x i32> %5
2411}
2412
2413define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2414; GENERIC-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002415; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002416; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2417; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2418; GENERIC-NEXT: retq # sched: [1:1.00]
2419;
2420; HASWELL-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002421; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002422; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002423; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
2424; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002425;
Gadi Haber85d99b42017-10-17 13:45:39 +00002426; BROADWELL-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002427; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002428; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002429; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2430; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002431;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002432; SKYLAKE-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002433; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002434; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002435; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2436; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002437;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002438; SKX-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002439; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002440; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2441; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002442; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002443;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002444; ZNVER1-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002445; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002446; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2447; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2448; ZNVER1-NEXT: retq # sched: [1:0.50]
2449 %1 = icmp sgt <4 x i64> %a0, %a1
2450 %2 = sext <4 x i1> %1 to <4 x i64>
2451 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2452 %4 = icmp sgt <4 x i64> %2, %3
2453 %5 = sext <4 x i1> %4 to <4 x i64>
2454 ret <4 x i64> %5
2455}
2456
2457define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2458; GENERIC-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002459; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002460; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2461; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2462; GENERIC-NEXT: retq # sched: [1:1.00]
2463;
2464; HASWELL-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002465; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002466; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002467; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2468; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002469;
Gadi Haber85d99b42017-10-17 13:45:39 +00002470; BROADWELL-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002471; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002472; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002473; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2474; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002475;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002476; SKYLAKE-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002477; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002478; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2479; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2480; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002481;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002482; SKX-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002483; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002484; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2485; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002486; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002487;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002488; ZNVER1-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002489; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002490; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2491; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2492; ZNVER1-NEXT: retq # sched: [1:0.50]
2493 %1 = icmp sgt <16 x i16> %a0, %a1
2494 %2 = sext <16 x i1> %1 to <16 x i16>
2495 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2496 %4 = icmp sgt <16 x i16> %2, %3
2497 %5 = sext <16 x i1> %4 to <16 x i16>
2498 ret <16 x i16> %5
2499}
2500
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002501define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2502; GENERIC-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002503; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002504; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2505; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
2506; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2507; GENERIC-NEXT: retq # sched: [1:1.00]
2508;
2509; HASWELL-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002510; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002511; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002512; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002513; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002514; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002515;
Gadi Haber85d99b42017-10-17 13:45:39 +00002516; BROADWELL-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002517; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002518; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002519; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002520; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002521; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002522;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002523; SKYLAKE-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002524; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002525; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002526; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2527; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2528; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002529;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002530; SKX-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002531; SKX: # %bb.0:
Craig Toppere5d44ce2017-11-04 18:10:03 +00002532; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2533; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002534; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2535; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002536;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002537; ZNVER1-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002538; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002539; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2540; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2541; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2542; ZNVER1-NEXT: retq # sched: [1:0.50]
2543 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2544 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2545 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2546 %4 = add <4 x i64> %1, %3
2547 ret <4 x i64> %4
2548}
2549
2550define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2551; GENERIC-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002552; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002553; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2554; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2555; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2556; GENERIC-NEXT: retq # sched: [1:1.00]
2557;
2558; HASWELL-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002559; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002560; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002561; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002562; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002563; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002564;
Gadi Haber85d99b42017-10-17 13:45:39 +00002565; BROADWELL-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002566; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002567; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002568; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002569; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002570; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002571;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002572; SKYLAKE-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002573; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002574; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002575; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2576; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2577; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002578;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002579; SKX-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002580; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002581; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002582; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2583; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2584; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002585;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002586; ZNVER1-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002587; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002588; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2589; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2590; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2591; ZNVER1-NEXT: retq # sched: [1:0.50]
2592 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2593 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2594 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2595 %4 = add <8 x i32> %1, %3
2596 ret <8 x i32> %4
2597}
2598declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2599
2600define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2601; GENERIC-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002602; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002603; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2604; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2605; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2606; GENERIC-NEXT: retq # sched: [1:1.00]
2607;
2608; HASWELL-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002609; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002610; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002611; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002612; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002613; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002614;
Gadi Haber85d99b42017-10-17 13:45:39 +00002615; BROADWELL-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002616; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002617; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002618; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002619; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002620; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002621;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002622; SKYLAKE-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002623; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002624; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002625; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002626; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002627; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002628;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002629; SKX-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002630; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002631; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002632; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2633; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2634; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002635;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002636; ZNVER1-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002637; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002638; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2639; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2640; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2641; ZNVER1-NEXT: retq # sched: [1:0.50]
2642 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2643 %2 = load <4 x double>, <4 x double> *%a1, align 32
2644 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2645 %4 = fadd <4 x double> %1, %3
2646 ret <4 x double> %4
2647}
2648
2649define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2650; GENERIC-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002651; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002652; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2653; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2654; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2655; GENERIC-NEXT: retq # sched: [1:1.00]
2656;
2657; HASWELL-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002658; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002659; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002660; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002661; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002662; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002663;
Gadi Haber85d99b42017-10-17 13:45:39 +00002664; BROADWELL-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002665; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002666; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002667; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002668; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002669; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002670;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002671; SKYLAKE-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002672; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002673; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002674; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002675; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002676; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002677;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002678; SKX-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002679; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002680; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002681; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2682; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
2683; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002684;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002685; ZNVER1-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002686; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002687; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2688; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2689; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2690; ZNVER1-NEXT: retq # sched: [1:0.50]
2691 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2692 %2 = load <8 x float>, <8 x float> *%a2, align 32
2693 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2694 %4 = fadd <8 x float> %1, %3
2695 ret <8 x float> %4
2696}
2697declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2698
2699define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2700; GENERIC-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002701; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002702; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2703; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2704; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2705; GENERIC-NEXT: retq # sched: [1:1.00]
2706;
2707; HASWELL-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002708; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002709; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002710; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002711; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002712; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002713;
Gadi Haber85d99b42017-10-17 13:45:39 +00002714; BROADWELL-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002715; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002716; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002717; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002718; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002719; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002720;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002721; SKYLAKE-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002722; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002723; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002724; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2725; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2726; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002727;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002728; SKX-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002729; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002730; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002731; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2732; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2733; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002734;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002735; ZNVER1-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002736; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002737; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2738; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2739; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2740; ZNVER1-NEXT: retq # sched: [1:0.50]
2741 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2742 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2743 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2744 %4 = add <4 x i64> %1, %3
2745 ret <4 x i64> %4
2746}
2747
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002748define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2749; GENERIC-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002750; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002751; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002752; GENERIC-NEXT: retq # sched: [1:1.00]
2753;
2754; HASWELL-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002755; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002756; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2757; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002758;
Gadi Haber85d99b42017-10-17 13:45:39 +00002759; BROADWELL-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002760; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002761; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002762; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002763;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002764; SKYLAKE-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002765; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002766; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2767; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002768;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002769; SKX-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002770; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002771; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2772; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002773;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002774; ZNVER1-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002775; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002776; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2777; ZNVER1-NEXT: retq # sched: [1:0.50]
2778 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2779 ret <4 x i32> %1
2780}
2781declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2782
2783define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2784; GENERIC-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002785; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002786; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002787; GENERIC-NEXT: retq # sched: [1:1.00]
2788;
2789; HASWELL-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002790; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002791; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50]
2792; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002793;
Gadi Haber85d99b42017-10-17 13:45:39 +00002794; BROADWELL-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002795; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002796; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002797; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002798;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002799; SKYLAKE-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002800; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002801; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2802; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002803;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002804; SKX-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002805; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002806; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2807; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002808;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002809; ZNVER1-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002810; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002811; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2812; ZNVER1-NEXT: retq # sched: [1:0.50]
2813 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2814 ret <8 x i32> %1
2815}
2816declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2817
2818define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2819; GENERIC-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002820; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002821; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002822; GENERIC-NEXT: retq # sched: [1:1.00]
2823;
2824; HASWELL-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002825; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002826; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2827; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002828;
Gadi Haber85d99b42017-10-17 13:45:39 +00002829; BROADWELL-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002830; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002831; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002832; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002833;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002834; SKYLAKE-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002835; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002836; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2837; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002838;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002839; SKX-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002840; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002841; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2842; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002843;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002844; ZNVER1-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002845; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002846; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2847; ZNVER1-NEXT: retq # sched: [1:0.50]
2848 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2849 ret <2 x i64> %1
2850}
2851declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2852
2853define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2854; GENERIC-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002855; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002856; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002857; GENERIC-NEXT: retq # sched: [1:1.00]
2858;
2859; HASWELL-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002860; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002861; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00]
2862; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002863;
Gadi Haber85d99b42017-10-17 13:45:39 +00002864; BROADWELL-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002865; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002866; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002867; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002868;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002869; SKYLAKE-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002870; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002871; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2872; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002873;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002874; SKX-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002875; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002876; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2877; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002878;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002879; ZNVER1-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002880; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002881; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
2882; ZNVER1-NEXT: retq # sched: [1:0.50]
2883 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2884 ret <4 x i64> %1
2885}
2886declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2887
2888define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2889; GENERIC-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002890; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002891; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002892; GENERIC-NEXT: retq # sched: [1:1.00]
2893;
2894; HASWELL-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002895; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002896; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00]
2897; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002898;
Gadi Haber85d99b42017-10-17 13:45:39 +00002899; BROADWELL-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002900; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002901; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002902; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002903;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002904; SKYLAKE-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002905; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002906; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2907; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002908;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002909; SKX-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002910; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002911; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2912; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002913;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002914; ZNVER1-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002915; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002916; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2917; ZNVER1-NEXT: retq # sched: [1:0.50]
2918 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2919 ret <4 x i32> %1
2920}
2921declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2922
2923define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2924; GENERIC-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002925; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002926; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +00002927; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002928; GENERIC-NEXT: retq # sched: [1:1.00]
2929;
2930; HASWELL-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002931; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002932; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002933; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002934; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002935;
Gadi Haber85d99b42017-10-17 13:45:39 +00002936; BROADWELL-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002937; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002938; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002939; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002940; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002941;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002942; SKYLAKE-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002943; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002944; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002945; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002946; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002947;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002948; SKX-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002949; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002950; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002951; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002952; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002953;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002954; ZNVER1-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002955; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002956; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
2957; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2958; ZNVER1-NEXT: retq # sched: [1:0.50]
2959 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2960 ret <4 x i32> %1
2961}
2962declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2963
2964define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2965; GENERIC-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002966; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002967; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002968; GENERIC-NEXT: retq # sched: [1:1.00]
2969;
2970; HASWELL-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002971; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002972; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
2973; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002974;
Gadi Haber85d99b42017-10-17 13:45:39 +00002975; BROADWELL-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002976; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002977; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002978; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002979;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002980; SKYLAKE-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002981; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002982; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2983; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002984;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002985; SKX-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002986; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002987; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2988; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002989;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002990; ZNVER1-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002991; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002992; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2993; ZNVER1-NEXT: retq # sched: [1:0.50]
2994 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2995 ret <2 x i64> %1
2996}
2997declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
2998
2999define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
3000; GENERIC-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003001; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00003002; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003003; GENERIC-NEXT: retq # sched: [1:1.00]
3004;
3005; HASWELL-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003006; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003007; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00]
3008; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003009;
Gadi Haber85d99b42017-10-17 13:45:39 +00003010; BROADWELL-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003011; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00003012; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003013; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003014;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003015; SKYLAKE-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003016; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003017; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3018; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003019;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003020; SKX-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003021; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003022; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3023; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003024;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003025; ZNVER1-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003026; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003027; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
3028; ZNVER1-NEXT: retq # sched: [1:0.50]
3029 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
3030 ret <4 x i64> %1
3031}
3032declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
3033
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003034define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3035; GENERIC-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003036; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003037; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3038; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3039; GENERIC-NEXT: retq # sched: [1:1.00]
3040;
3041; HASWELL-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003042; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003043; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003044; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3045; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003046;
Gadi Haber85d99b42017-10-17 13:45:39 +00003047; BROADWELL-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003048; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003049; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003050; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3051; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003052;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003053; SKYLAKE-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003054; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003055; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003056; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3057; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003058;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003059; SKX-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003060; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003061; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003062; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3063; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003064;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003065; ZNVER1-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003066; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003067; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3068; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3069; ZNVER1-NEXT: retq # sched: [1:0.50]
3070 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
3071 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3072 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
3073 ret <8 x i32> %3
3074}
3075declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
3076
3077define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3078; GENERIC-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003079; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003080; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3081; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3082; GENERIC-NEXT: retq # sched: [1:1.00]
3083;
3084; HASWELL-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003085; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003086; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003087; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3088; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003089;
Gadi Haber85d99b42017-10-17 13:45:39 +00003090; BROADWELL-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003091; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003092; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003093; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3094; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003095;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003096; SKYLAKE-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003097; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003098; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003099; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3100; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003101;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003102; SKX-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003103; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003104; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003105; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3106; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003107;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003108; ZNVER1-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003109; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003110; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3111; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3112; ZNVER1-NEXT: retq # sched: [1:0.50]
3113 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
3114 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3115 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
3116 ret <16 x i16> %3
3117}
3118declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
3119
3120define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3121; GENERIC-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003122; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003123; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3124; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3125; GENERIC-NEXT: retq # sched: [1:1.00]
3126;
3127; HASWELL-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003128; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003129; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003130; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3131; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003132;
Gadi Haber85d99b42017-10-17 13:45:39 +00003133; BROADWELL-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003134; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003135; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003136; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3137; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003138;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003139; SKYLAKE-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003140; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003141; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003142; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3143; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003144;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003145; SKX-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003146; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003147; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003148; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3149; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003150;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003151; ZNVER1-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003152; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003153; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3154; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3155; ZNVER1-NEXT: retq # sched: [1:0.50]
3156 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
3157 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3158 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
3159 ret <16 x i16> %3
3160}
3161declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
3162
3163define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3164; GENERIC-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003165; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003166; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3167; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3168; GENERIC-NEXT: retq # sched: [1:1.00]
3169;
3170; HASWELL-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003171; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003172; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003173; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3174; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003175;
Gadi Haber85d99b42017-10-17 13:45:39 +00003176; BROADWELL-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003177; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003178; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003179; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3180; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003181;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003182; SKYLAKE-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003183; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003184; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003185; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3186; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003187;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003188; SKX-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003189; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003190; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003191; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3192; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003193;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003194; ZNVER1-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003195; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003196; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3197; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3198; ZNVER1-NEXT: retq # sched: [1:0.50]
3199 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
3200 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3201 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
3202 ret <8 x i32> %3
3203}
3204declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
3205
3206define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3207; GENERIC-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003208; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003209; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3210; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3211; GENERIC-NEXT: retq # sched: [1:1.00]
3212;
3213; HASWELL-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003214; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003215; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003216; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3217; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003218;
Gadi Haber85d99b42017-10-17 13:45:39 +00003219; BROADWELL-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003220; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003221; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003222; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3223; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003224;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003225; SKYLAKE-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003226; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003227; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003228; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3229; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003230;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003231; SKX-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003232; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003233; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003234; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3235; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003236;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003237; ZNVER1-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003238; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003239; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3240; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3241; ZNVER1-NEXT: retq # sched: [1:0.50]
3242 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
3243 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3244 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
3245 ret <16 x i16> %3
3246}
3247declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
3248
3249define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3250; GENERIC-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003251; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003252; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3253; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3254; GENERIC-NEXT: retq # sched: [1:1.00]
3255;
3256; HASWELL-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003257; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003258; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003259; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3260; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003261;
Gadi Haber85d99b42017-10-17 13:45:39 +00003262; BROADWELL-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003263; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003264; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003265; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3266; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003267;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003268; SKYLAKE-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003269; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003270; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003271; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3272; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003273;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003274; SKX-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003275; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003276; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003277; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3278; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003279;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003280; ZNVER1-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003281; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003282; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3283; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3284; ZNVER1-NEXT: retq # sched: [1:0.50]
3285 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
3286 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3287 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
3288 ret <16 x i16> %3
3289}
3290declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
3291
3292define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3293; GENERIC-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003294; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003295; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3296; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3297; GENERIC-NEXT: retq # sched: [1:1.00]
3298;
3299; HASWELL-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003300; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003301; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003302; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3303; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003304;
Gadi Haber85d99b42017-10-17 13:45:39 +00003305; BROADWELL-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003306; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003307; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003308; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3309; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003310;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003311; SKYLAKE-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003312; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003313; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003314; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3315; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003316;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003317; SKX-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003318; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003319; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003320; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3321; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003322;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003323; ZNVER1-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003324; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003325; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3326; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3327; ZNVER1-NEXT: retq # sched: [1:0.50]
3328 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
3329 %2 = bitcast <16 x i16> %1 to <32 x i8>
3330 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
3331 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
3332 ret <16 x i16> %4
3333}
3334declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
3335
3336define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3337; GENERIC-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003338; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003339; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3340; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3341; GENERIC-NEXT: retq # sched: [1:1.00]
3342;
3343; HASWELL-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003344; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003345; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003346; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3347; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003348;
Gadi Haber85d99b42017-10-17 13:45:39 +00003349; BROADWELL-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003350; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003351; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003352; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3353; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003354;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003355; SKYLAKE-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003356; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003357; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003358; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3359; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003360;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003361; SKX-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003362; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003363; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003364; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3365; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003366;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003367; ZNVER1-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003368; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003369; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3370; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3371; ZNVER1-NEXT: retq # sched: [1:0.50]
3372 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
3373 %2 = bitcast <8 x i32> %1 to <16 x i16>
3374 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
3375 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
3376 ret <8 x i32> %4
3377}
3378declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
3379
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003380define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
3381; GENERIC-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003382; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003383; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [4:0.50]
3384; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003385; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3386; GENERIC-NEXT: retq # sched: [1:1.00]
3387;
3388; HASWELL-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003389; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003390; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3391; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003392; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003393; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003394;
Gadi Haber85d99b42017-10-17 13:45:39 +00003395; BROADWELL-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003396; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003397; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3398; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003399; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003400; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003401;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003402; SKYLAKE-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003403; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003404; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3405; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003406; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003407; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003408;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003409; SKX-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003410; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003411; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3412; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003413; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003414; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003415;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003416; ZNVER1-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003417; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003418; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
3419; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:?]
3420; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3421; ZNVER1-NEXT: retq # sched: [1:0.50]
3422 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
3423 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
3424 ret <4 x i32> %1
3425}
3426declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
3427declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
3428
3429define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
3430; GENERIC-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003431; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003432; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [4:0.50]
3433; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003434; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3435; GENERIC-NEXT: retq # sched: [1:1.00]
3436;
3437; HASWELL-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003438; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003439; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3440; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003441; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003442; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003443;
Gadi Haber85d99b42017-10-17 13:45:39 +00003444; BROADWELL-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003445; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003446; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3447; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003448; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003449; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003450;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003451; SKYLAKE-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003452; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003453; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3454; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003455; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003456; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003457;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003458; SKX-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003459; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003460; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3461; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003462; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003463; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003464;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003465; ZNVER1-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003466; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003467; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
3468; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:?]
3469; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3470; ZNVER1-NEXT: retq # sched: [1:0.50]
3471 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
3472 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
3473 ret <8 x i32> %1
3474}
3475declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
3476declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
3477
3478define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
3479; GENERIC-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003480; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003481; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [4:0.50]
3482; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003483; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3484; GENERIC-NEXT: retq # sched: [1:1.00]
3485;
3486; HASWELL-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003487; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003488; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3489; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003490; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003491; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003492;
Gadi Haber85d99b42017-10-17 13:45:39 +00003493; BROADWELL-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003494; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003495; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3496; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003497; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003498; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003499;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003500; SKYLAKE-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003501; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003502; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3503; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003504; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003505; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003506;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003507; SKX-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003508; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003509; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3510; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003511; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003512; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003513;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003514; ZNVER1-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003515; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003516; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3517; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:?]
3518; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3519; ZNVER1-NEXT: retq # sched: [1:0.50]
3520 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3521 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3522 ret <2 x i64> %1
3523}
3524declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3525declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3526
3527define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3528; GENERIC-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003529; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003530; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [4:0.50]
3531; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003532; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3533; GENERIC-NEXT: retq # sched: [1:1.00]
3534;
3535; HASWELL-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003536; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003537; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3538; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003539; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003540; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003541;
Gadi Haber85d99b42017-10-17 13:45:39 +00003542; BROADWELL-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003543; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003544; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3545; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003546; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003547; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003548;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003549; SKYLAKE-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003550; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003551; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3552; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003553; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003554; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003555;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003556; SKX-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003557; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003558; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3559; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003560; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003561; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003562;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003563; ZNVER1-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003564; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003565; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3566; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:?]
3567; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3568; ZNVER1-NEXT: retq # sched: [1:0.50]
3569 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3570 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3571 ret <4 x i64> %1
3572}
3573declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3574declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3575
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003576define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3577; GENERIC-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003578; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003579; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3580; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3581; GENERIC-NEXT: retq # sched: [1:1.00]
3582;
3583; HASWELL-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003584; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003585; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003586; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3587; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003588;
Gadi Haber85d99b42017-10-17 13:45:39 +00003589; BROADWELL-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003590; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003591; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003592; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3593; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003594;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003595; SKYLAKE-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003596; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003597; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3598; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3599; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003600;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003601; SKX-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003602; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003603; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3604; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3605; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003606;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003607; ZNVER1-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003608; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003609; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3610; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3611; ZNVER1-NEXT: retq # sched: [1:0.50]
3612 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3613 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3614 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3615 ret <32 x i8> %3
3616}
3617declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3618
3619define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3620; GENERIC-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003621; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003622; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3623; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3624; GENERIC-NEXT: retq # sched: [1:1.00]
3625;
3626; HASWELL-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003627; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003628; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003629; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3630; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003631;
Gadi Haber85d99b42017-10-17 13:45:39 +00003632; BROADWELL-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003633; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003634; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003635; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3636; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003637;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003638; SKYLAKE-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003639; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003640; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3641; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3642; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003643;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003644; SKX-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003645; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003646; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3647; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3648; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003649;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003650; ZNVER1-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003651; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003652; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3653; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3654; ZNVER1-NEXT: retq # sched: [1:0.50]
3655 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3656 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3657 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3658 ret <8 x i32> %3
3659}
3660declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3661
3662define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3663; GENERIC-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003664; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003665; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3666; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3667; GENERIC-NEXT: retq # sched: [1:1.00]
3668;
3669; HASWELL-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003670; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003671; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003672; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3673; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003674;
Gadi Haber85d99b42017-10-17 13:45:39 +00003675; BROADWELL-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003676; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003677; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003678; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3679; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003680;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003681; SKYLAKE-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003682; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003683; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3684; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3685; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003686;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003687; SKX-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003688; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003689; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3690; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3691; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003692;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003693; ZNVER1-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003694; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003695; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3696; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3697; ZNVER1-NEXT: retq # sched: [1:0.50]
3698 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3699 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3700 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3701 ret <16 x i16> %3
3702}
3703declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3704
3705define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3706; GENERIC-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003707; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003708; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3709; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3710; GENERIC-NEXT: retq # sched: [1:1.00]
3711;
3712; HASWELL-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003713; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003714; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003715; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3716; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003717;
Gadi Haber85d99b42017-10-17 13:45:39 +00003718; BROADWELL-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003719; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003720; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003721; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003723;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003724; SKYLAKE-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003725; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003726; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3727; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3728; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003729;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003730; SKX-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003731; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003732; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3733; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3734; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003735;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003736; ZNVER1-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003737; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003738; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3739; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3740; ZNVER1-NEXT: retq # sched: [1:0.50]
3741 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3742 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3743 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3744 ret <32 x i8> %3
3745}
3746declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3747
3748define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3749; GENERIC-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003750; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003751; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3752; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3753; GENERIC-NEXT: retq # sched: [1:1.00]
3754;
3755; HASWELL-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003756; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003757; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003758; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3759; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003760;
Gadi Haber85d99b42017-10-17 13:45:39 +00003761; BROADWELL-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003762; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003763; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003764; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3765; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003766;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003767; SKYLAKE-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003768; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003769; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3770; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3771; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003772;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003773; SKX-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003774; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003775; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3776; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3777; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003778;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003779; ZNVER1-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003780; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003781; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3782; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3783; ZNVER1-NEXT: retq # sched: [1:0.50]
3784 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3785 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3786 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3787 ret <8 x i32> %3
3788}
3789declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3790
3791define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3792; GENERIC-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003793; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003794; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3795; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3796; GENERIC-NEXT: retq # sched: [1:1.00]
3797;
3798; HASWELL-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003799; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003800; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003801; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3802; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003803;
Gadi Haber85d99b42017-10-17 13:45:39 +00003804; BROADWELL-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003805; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003806; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003807; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3808; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003809;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003810; SKYLAKE-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003811; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003812; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3813; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3814; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003815;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003816; SKX-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003817; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003818; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3819; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3820; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003821;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003822; ZNVER1-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003823; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003824; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3825; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3826; ZNVER1-NEXT: retq # sched: [1:0.50]
3827 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3828 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3829 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3830 ret <16 x i16> %3
3831}
3832declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3833
3834define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3835; GENERIC-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003836; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003837; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3838; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3839; GENERIC-NEXT: retq # sched: [1:1.00]
3840;
3841; HASWELL-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003842; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003843; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003844; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3845; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003846;
Gadi Haber85d99b42017-10-17 13:45:39 +00003847; BROADWELL-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003848; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003849; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003850; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3851; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003852;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003853; SKYLAKE-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003854; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003855; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3856; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3857; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003858;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003859; SKX-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003860; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003861; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3862; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3863; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003864;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003865; ZNVER1-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003866; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003867; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3868; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3869; ZNVER1-NEXT: retq # sched: [1:0.50]
3870 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3871 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3872 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3873 ret <32 x i8> %3
3874}
3875declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3876
3877define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3878; GENERIC-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003879; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003880; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3881; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3882; GENERIC-NEXT: retq # sched: [1:1.00]
3883;
3884; HASWELL-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003885; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003886; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003887; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3888; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003889;
Gadi Haber85d99b42017-10-17 13:45:39 +00003890; BROADWELL-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003891; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003892; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003893; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3894; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003895;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003896; SKYLAKE-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003897; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003898; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3899; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3900; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003901;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003902; SKX-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003903; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003904; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3905; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3906; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003907;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003908; ZNVER1-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003909; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003910; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3911; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3912; ZNVER1-NEXT: retq # sched: [1:0.50]
3913 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3914 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3915 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3916 ret <8 x i32> %3
3917}
3918declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3919
3920define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3921; GENERIC-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003922; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003923; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3924; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3925; GENERIC-NEXT: retq # sched: [1:1.00]
3926;
3927; HASWELL-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003928; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003929; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003930; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3931; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003932;
Gadi Haber85d99b42017-10-17 13:45:39 +00003933; BROADWELL-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003934; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003935; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003936; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3937; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003938;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003939; SKYLAKE-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003940; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003941; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3942; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3943; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003944;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003945; SKX-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003946; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003947; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3948; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3949; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003950;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003951; ZNVER1-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003952; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003953; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3954; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3955; ZNVER1-NEXT: retq # sched: [1:0.50]
3956 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3957 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3958 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3959 ret <16 x i16> %3
3960}
3961declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3962
3963define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3964; GENERIC-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003965; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003966; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3967; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3968; GENERIC-NEXT: retq # sched: [1:1.00]
3969;
3970; HASWELL-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003971; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003972; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003973; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3974; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003975;
Gadi Haber85d99b42017-10-17 13:45:39 +00003976; BROADWELL-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003977; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003978; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003979; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3980; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003981;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003982; SKYLAKE-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003983; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003984; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3985; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3986; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003987;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003988; SKX-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003989; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003990; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3991; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3992; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003993;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003994; ZNVER1-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003995; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003996; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3997; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3998; ZNVER1-NEXT: retq # sched: [1:0.50]
3999 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
4000 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
4001 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
4002 ret <32 x i8> %3
4003}
4004declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
4005
4006define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4007; GENERIC-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004008; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004009; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4010; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4011; GENERIC-NEXT: retq # sched: [1:1.00]
4012;
4013; HASWELL-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004014; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004015; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004016; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4017; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004018;
Gadi Haber85d99b42017-10-17 13:45:39 +00004019; BROADWELL-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004020; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004021; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004022; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4023; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004024;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004025; SKYLAKE-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004026; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004027; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4028; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4029; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004030;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004031; SKX-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004032; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00004033; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4034; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4035; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004036;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004037; ZNVER1-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004038; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004039; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4040; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4041; ZNVER1-NEXT: retq # sched: [1:0.50]
4042 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
4043 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4044 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
4045 ret <8 x i32> %3
4046}
4047declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
4048
4049define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4050; GENERIC-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004051; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004052; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4053; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4054; GENERIC-NEXT: retq # sched: [1:1.00]
4055;
4056; HASWELL-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004057; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004058; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004059; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4060; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004061;
Gadi Haber85d99b42017-10-17 13:45:39 +00004062; BROADWELL-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004063; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004064; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004065; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4066; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004067;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004068; SKYLAKE-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004069; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004070; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4071; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4072; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004073;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004074; SKX-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004075; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00004076; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4077; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4078; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004079;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004080; ZNVER1-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004081; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004082; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4083; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4084; ZNVER1-NEXT: retq # sched: [1:0.50]
4085 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
4086 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4087 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
4088 ret <16 x i16> %3
4089}
4090declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
4091
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004092define i32 @test_pmovmskb(<32 x i8> %a0) {
4093; GENERIC-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004094; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004095; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +00004096; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004097; GENERIC-NEXT: retq # sched: [1:1.00]
4098;
4099; HASWELL-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004100; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004101; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4102; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004103; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004104;
Gadi Haber85d99b42017-10-17 13:45:39 +00004105; BROADWELL-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004106; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004107; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4108; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004109; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004110;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004111; SKYLAKE-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004112; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004113; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004114; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004115; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004116;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004117; SKX-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004118; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004119; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4120; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004121; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004122;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004123; ZNVER1-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004124; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004125; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4126; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4127; ZNVER1-NEXT: retq # sched: [1:0.50]
4128 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
4129 ret i32 %1
4130}
4131declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
4132
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004133define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4134; GENERIC-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004135; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004136; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
4137; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
4138; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4139; GENERIC-NEXT: retq # sched: [1:1.00]
4140;
4141; HASWELL-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004142; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004143; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004144; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004145; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004146; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004147;
Gadi Haber85d99b42017-10-17 13:45:39 +00004148; BROADWELL-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004149; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004150; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004151; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004152; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004153; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004154;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004155; SKYLAKE-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004156; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004157; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004158; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4159; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4160; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004161;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004162; SKX-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004163; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004164; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004165; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4166; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4167; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004168;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004169; ZNVER1-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004170; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004171; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
4172; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
4173; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4174; ZNVER1-NEXT: retq # sched: [1:0.50]
4175 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4176 %2 = sext <8 x i8> %1 to <8 x i32>
4177 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4178 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4179 %5 = sext <8 x i8> %4 to <8 x i32>
4180 %6 = add <8 x i32> %2, %5
4181 ret <8 x i32> %6
4182}
4183
4184define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4185; GENERIC-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004186; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004187; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
4188; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
4189; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4190; GENERIC-NEXT: retq # sched: [1:1.00]
4191;
4192; HASWELL-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004193; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004194; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004195; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004196; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004197; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004198;
Gadi Haber85d99b42017-10-17 13:45:39 +00004199; BROADWELL-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004200; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004201; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004202; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004203; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004204; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004205;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004206; SKYLAKE-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004207; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004208; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004209; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4210; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4211; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004212;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004213; SKX-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004214; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004215; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004216; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4217; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4218; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004219;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004220; ZNVER1-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004221; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004222; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
4223; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
4224; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4225; ZNVER1-NEXT: retq # sched: [1:0.50]
4226 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4227 %2 = sext <4 x i8> %1 to <4 x i64>
4228 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4229 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4230 %5 = sext <4 x i8> %4 to <4 x i64>
4231 %6 = add <4 x i64> %2, %5
4232 ret <4 x i64> %6
4233}
4234
4235define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4236; GENERIC-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004237; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004238; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
4239; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
4240; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4241; GENERIC-NEXT: retq # sched: [1:1.00]
4242;
4243; HASWELL-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004244; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004245; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004246; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004247; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004248; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004249;
Gadi Haber85d99b42017-10-17 13:45:39 +00004250; BROADWELL-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004251; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004252; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004253; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004254; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004255; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004256;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004257; SKYLAKE-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004258; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004259; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004260; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4261; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4262; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004263;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004264; SKX-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004265; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004266; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004267; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4268; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4269; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004270;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004271; ZNVER1-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004272; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004273; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
4274; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
4275; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4276; ZNVER1-NEXT: retq # sched: [1:0.50]
4277 %1 = sext <16 x i8> %a0 to <16 x i16>
4278 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4279 %3 = sext <16 x i8> %2 to <16 x i16>
4280 %4 = add <16 x i16> %1, %3
4281 ret <16 x i16> %4
4282}
4283
4284define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4285; GENERIC-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004286; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004287; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4288; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
4289; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4290; GENERIC-NEXT: retq # sched: [1:1.00]
4291;
4292; HASWELL-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004293; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004294; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004295; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004296; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004297; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004298;
Gadi Haber85d99b42017-10-17 13:45:39 +00004299; BROADWELL-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004300; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004301; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004302; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004303; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004304; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004305;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004306; SKYLAKE-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004307; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004308; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004309; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4310; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4311; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004312;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004313; SKX-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004314; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004315; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004316; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4317; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4318; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004319;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004320; ZNVER1-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004321; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004322; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
4323; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
4324; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4325; ZNVER1-NEXT: retq # sched: [1:0.50]
4326 %1 = sext <4 x i32> %a0 to <4 x i64>
4327 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4328 %3 = sext <4 x i32> %2 to <4 x i64>
4329 %4 = add <4 x i64> %1, %3
4330 ret <4 x i64> %4
4331}
4332
4333define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4334; GENERIC-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004335; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004336; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
4337; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
4338; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4339; GENERIC-NEXT: retq # sched: [1:1.00]
4340;
4341; HASWELL-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004342; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004343; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004344; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004345; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004346; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004347;
Gadi Haber85d99b42017-10-17 13:45:39 +00004348; BROADWELL-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004349; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004350; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004351; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004352; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004353; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004354;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004355; SKYLAKE-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004356; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004357; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004358; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4359; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4360; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004361;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004362; SKX-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004363; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004364; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004365; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4366; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4367; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004368;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004369; ZNVER1-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004370; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004371; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
4372; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
4373; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4374; ZNVER1-NEXT: retq # sched: [1:0.50]
4375 %1 = sext <8 x i16> %a0 to <8 x i32>
4376 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4377 %3 = sext <8 x i16> %2 to <8 x i32>
4378 %4 = add <8 x i32> %1, %3
4379 ret <8 x i32> %4
4380}
4381
4382define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4383; GENERIC-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004384; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004385; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
4386; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
4387; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4388; GENERIC-NEXT: retq # sched: [1:1.00]
4389;
4390; HASWELL-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004391; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004392; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004393; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004394; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004395; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004396;
Gadi Haber85d99b42017-10-17 13:45:39 +00004397; BROADWELL-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004398; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004399; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004400; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004401; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004402; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004403;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004404; SKYLAKE-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004405; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004406; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004407; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4408; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4409; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004410;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004411; SKX-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004412; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004413; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004414; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4415; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4416; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004417;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004418; ZNVER1-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004419; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004420; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
4421; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
4422; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4423; ZNVER1-NEXT: retq # sched: [1:0.50]
4424 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4425 %2 = sext <4 x i16> %1 to <4 x i64>
4426 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4427 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4428 %5 = sext <4 x i16> %4 to <4 x i64>
4429 %6 = add <4 x i64> %2, %5
4430 ret <4 x i64> %6
4431}
4432
4433define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4434; GENERIC-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004435; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004436; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4437; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
4438; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4439; GENERIC-NEXT: retq # sched: [1:1.00]
4440;
4441; HASWELL-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004442; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004443; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004444; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004445; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004446; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004447;
Gadi Haber85d99b42017-10-17 13:45:39 +00004448; BROADWELL-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004449; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004450; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004451; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004452; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004453; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004454;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004455; SKYLAKE-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004456; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004457; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004458; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4459; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4460; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004461;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004462; SKX-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004463; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004464; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004465; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4466; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4467; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004468;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004469; ZNVER1-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004470; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004471; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
4472; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
4473; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4474; ZNVER1-NEXT: retq # sched: [1:0.50]
4475 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4476 %2 = zext <8 x i8> %1 to <8 x i32>
4477 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4478 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4479 %5 = zext <8 x i8> %4 to <8 x i32>
4480 %6 = add <8 x i32> %2, %5
4481 ret <8 x i32> %6
4482}
4483
4484define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4485; GENERIC-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004486; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004487; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4488; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
4489; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4490; GENERIC-NEXT: retq # sched: [1:1.00]
4491;
4492; HASWELL-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004493; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004494; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004495; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004496; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004497; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004498;
Gadi Haber85d99b42017-10-17 13:45:39 +00004499; BROADWELL-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004500; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004501; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004502; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004503; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004504; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004505;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004506; SKYLAKE-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004507; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004508; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004509; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4510; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4511; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004512;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004513; SKX-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004514; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004515; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004516; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4517; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4518; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004519;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004520; ZNVER1-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004521; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004522; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
4523; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
4524; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4525; ZNVER1-NEXT: retq # sched: [1:0.50]
4526 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4527 %2 = zext <4 x i8> %1 to <4 x i64>
4528 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4529 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4530 %5 = zext <4 x i8> %4 to <4 x i64>
4531 %6 = add <4 x i64> %2, %5
4532 ret <4 x i64> %6
4533}
4534
4535define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4536; GENERIC-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004537; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004538; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4539; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
4540; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4541; GENERIC-NEXT: retq # sched: [1:1.00]
4542;
4543; HASWELL-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004544; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004545; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004546; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004547; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004548; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004549;
Gadi Haber85d99b42017-10-17 13:45:39 +00004550; BROADWELL-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004551; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004552; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004553; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004554; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004555; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004556;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004557; SKYLAKE-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004558; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004559; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004560; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4561; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4562; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004563;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004564; SKX-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004565; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004566; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004567; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4568; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4569; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004570;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004571; ZNVER1-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004572; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004573; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
4574; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
4575; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4576; ZNVER1-NEXT: retq # sched: [1:0.50]
4577 %1 = zext <16 x i8> %a0 to <16 x i16>
4578 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4579 %3 = zext <16 x i8> %2 to <16 x i16>
4580 %4 = add <16 x i16> %1, %3
4581 ret <16 x i16> %4
4582}
4583
4584define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4585; GENERIC-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004586; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004587; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4588; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
4589; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4590; GENERIC-NEXT: retq # sched: [1:1.00]
4591;
4592; HASWELL-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004593; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004594; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004595; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004596; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004597; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004598;
Gadi Haber85d99b42017-10-17 13:45:39 +00004599; BROADWELL-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004600; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004601; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004602; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004603; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004604; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004605;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004606; SKYLAKE-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004607; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004608; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004609; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4610; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4611; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004612;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004613; SKX-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004614; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004615; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004616; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4617; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4618; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004619;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004620; ZNVER1-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004621; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004622; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
4623; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
4624; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4625; ZNVER1-NEXT: retq # sched: [1:0.50]
4626 %1 = zext <4 x i32> %a0 to <4 x i64>
4627 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4628 %3 = zext <4 x i32> %2 to <4 x i64>
4629 %4 = add <4 x i64> %1, %3
4630 ret <4 x i64> %4
4631}
4632
4633define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4634; GENERIC-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004635; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004636; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4637; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
4638; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4639; GENERIC-NEXT: retq # sched: [1:1.00]
4640;
4641; HASWELL-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004642; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004643; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004644; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004645; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004646; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004647;
Gadi Haber85d99b42017-10-17 13:45:39 +00004648; BROADWELL-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004649; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004650; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004651; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004652; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004653; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004654;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004655; SKYLAKE-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004656; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004657; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004658; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4659; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4660; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004661;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004662; SKX-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004663; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004664; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004665; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4666; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4667; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004668;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004669; ZNVER1-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004670; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004671; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4672; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
4673; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4674; ZNVER1-NEXT: retq # sched: [1:0.50]
4675 %1 = zext <8 x i16> %a0 to <8 x i32>
4676 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4677 %3 = zext <8 x i16> %2 to <8 x i32>
4678 %4 = add <8 x i32> %1, %3
4679 ret <8 x i32> %4
4680}
4681
4682define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4683; GENERIC-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004684; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004685; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4686; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
4687; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4688; GENERIC-NEXT: retq # sched: [1:1.00]
4689;
4690; HASWELL-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004691; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004692; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004693; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004694; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004695; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004696;
Gadi Haber85d99b42017-10-17 13:45:39 +00004697; BROADWELL-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004698; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004699; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004700; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004701; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004702; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004703;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004704; SKYLAKE-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004705; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004706; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004707; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4708; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4709; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004710;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004711; SKX-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004712; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004713; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004714; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4715; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4716; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004717;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004718; ZNVER1-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004719; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004720; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4721; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
4722; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4723; ZNVER1-NEXT: retq # sched: [1:0.50]
4724 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4725 %2 = zext <4 x i16> %1 to <4 x i64>
4726 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4727 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4728 %5 = zext <4 x i16> %4 to <4 x i64>
4729 %6 = add <4 x i64> %2, %5
4730 ret <4 x i64> %6
4731}
4732
4733define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4734; GENERIC-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004735; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004736; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4737; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4738; GENERIC-NEXT: retq # sched: [1:1.00]
4739;
4740; HASWELL-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004741; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004742; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004743; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4744; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004745;
Gadi Haber85d99b42017-10-17 13:45:39 +00004746; BROADWELL-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004747; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004748; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004749; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4750; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004751;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004752; SKYLAKE-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004753; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004754; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004755; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4756; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004757;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004758; SKX-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004759; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004760; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004761; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4762; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004763;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004764; ZNVER1-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004765; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004766; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4767; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4768; ZNVER1-NEXT: retq # sched: [1:0.50]
4769 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4770 %2 = bitcast <4 x i64> %1 to <8 x i32>
4771 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4772 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
4773 ret <4 x i64> %4
4774}
4775declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4776
4777define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4778; GENERIC-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004779; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004780; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4781; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4782; GENERIC-NEXT: retq # sched: [1:1.00]
4783;
4784; HASWELL-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004785; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004786; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004787; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4788; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004789;
Gadi Haber85d99b42017-10-17 13:45:39 +00004790; BROADWELL-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004791; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004792; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004793; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4794; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004795;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004796; SKYLAKE-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004797; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004798; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004799; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4800; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004801;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004802; SKX-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004803; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004804; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004805; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4806; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004807;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004808; ZNVER1-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004809; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004810; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4811; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4812; ZNVER1-NEXT: retq # sched: [1:0.50]
4813 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4814 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4815 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4816 ret <16 x i16> %3
4817}
4818declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4819
4820define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4821; GENERIC-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004822; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004823; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4824; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4825; GENERIC-NEXT: retq # sched: [1:1.00]
4826;
4827; HASWELL-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004828; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004829; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004830; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4831; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004832;
Gadi Haber85d99b42017-10-17 13:45:39 +00004833; BROADWELL-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004834; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004835; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004836; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4837; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004838;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004839; SKYLAKE-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004840; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004841; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004842; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4843; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004844;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004845; SKX-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004846; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004847; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004848; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4849; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004850;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004851; ZNVER1-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004852; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004853; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4854; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4855; ZNVER1-NEXT: retq # sched: [1:0.50]
4856 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4857 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4858 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4859 ret <16 x i16> %3
4860}
4861declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4862
4863define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4864; GENERIC-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004865; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004866; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4867; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4868; GENERIC-NEXT: retq # sched: [1:1.00]
4869;
4870; HASWELL-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004871; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004872; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004873; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4874; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004875;
Gadi Haber85d99b42017-10-17 13:45:39 +00004876; BROADWELL-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004877; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004878; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004879; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4880; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004881;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004882; SKYLAKE-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004883; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004884; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004885; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4886; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004887;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004888; SKX-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004889; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004890; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004891; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4892; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004893;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004894; ZNVER1-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004895; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004896; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4897; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4898; ZNVER1-NEXT: retq # sched: [1:0.50]
4899 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4900 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4901 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4902 ret <16 x i16> %3
4903}
4904declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4905
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004906define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004907; GENERIC-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004908; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00004909; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4910; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4911; GENERIC-NEXT: retq # sched: [1:1.00]
4912;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004913; HASWELL-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004914; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004915; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004916; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00]
4917; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004918;
Gadi Haber85d99b42017-10-17 13:45:39 +00004919; BROADWELL-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004920; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004921; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004922; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
4923; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004924;
Gadi Haber767d98b2017-08-30 08:08:50 +00004925; SKYLAKE-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004926; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004927; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004928; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4929; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004930;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004931; SKX-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004932; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004933; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00004934; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4935; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004936;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004937; ZNVER1-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004938; ZNVER1: # %bb.0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004939; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4940; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4941; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004942 %1 = mul <8 x i32> %a0, %a1
4943 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4944 %3 = mul <8 x i32> %1, %2
4945 ret <8 x i32> %3
4946}
4947
4948define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004949; GENERIC-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004950; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00004951; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4952; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4953; GENERIC-NEXT: retq # sched: [1:1.00]
4954;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004955; HASWELL-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004956; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004957; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004958; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4959; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004960;
Gadi Haber85d99b42017-10-17 13:45:39 +00004961; BROADWELL-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004962; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004963; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004964; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4965; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004966;
Gadi Haber767d98b2017-08-30 08:08:50 +00004967; SKYLAKE-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004968; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004969; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004970; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4971; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004972;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004973; SKX-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004974; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004975; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004976; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4977; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004978;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004979; ZNVER1-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004980; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00004981; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4982; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004983; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004984 %1 = mul <16 x i16> %a0, %a1
4985 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4986 %3 = mul <16 x i16> %1, %2
4987 ret <16 x i16> %3
4988}
4989
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004990define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4991; GENERIC-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004992; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004993; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4994; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4995; GENERIC-NEXT: retq # sched: [1:1.00]
4996;
4997; HASWELL-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004998; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004999; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005000; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5001; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005002;
Gadi Haber85d99b42017-10-17 13:45:39 +00005003; BROADWELL-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005004; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005005; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005006; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5007; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005008;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005009; SKYLAKE-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005010; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00005011; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005012; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5013; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005014;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005015; SKX-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005016; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005017; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00005018; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5019; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005020;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005021; ZNVER1-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005022; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005023; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5024; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5025; ZNVER1-NEXT: retq # sched: [1:0.50]
5026 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
5027 %2 = bitcast <4 x i64> %1 to <8 x i32>
5028 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
5029 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
5030 ret <4 x i64> %4
5031}
5032declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
5033
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005034define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005035; GENERIC-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005036; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00005037; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5038; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5039; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5040; GENERIC-NEXT: retq # sched: [1:1.00]
5041;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005042; HASWELL-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005043; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005044; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005045; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005046; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005047; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005048;
Gadi Haber85d99b42017-10-17 13:45:39 +00005049; BROADWELL-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005050; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005051; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005052; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00005053; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005054; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005055;
Gadi Haber767d98b2017-08-30 08:08:50 +00005056; SKYLAKE-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005057; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005058; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5059; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5060; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5061; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005062;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005063; SKX-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005064; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005065; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5066; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5067; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5068; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005069;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005070; ZNVER1-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005071; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00005072; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5073; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5074; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005075; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005076 %1 = or <4 x i64> %a0, %a1
5077 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5078 %3 = or <4 x i64> %1, %2
5079 %4 = add <4 x i64> %3, %a1
5080 ret <4 x i64> %4
5081}
5082
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005083define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5084; GENERIC-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005085; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005086; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5087; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5088; GENERIC-NEXT: retq # sched: [1:1.00]
5089;
5090; HASWELL-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005091; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005092; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005093; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5094; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005095;
Gadi Haber85d99b42017-10-17 13:45:39 +00005096; BROADWELL-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005097; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005098; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005099; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5100; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005101;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005102; SKYLAKE-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005103; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00005104; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005105; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5106; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005107;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005108; SKX-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005109; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005110; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005111; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5112; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005113;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005114; ZNVER1-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005115; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005116; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5117; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5118; ZNVER1-NEXT: retq # sched: [1:0.50]
5119 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
5120 %2 = bitcast <4 x i64> %1 to <32 x i8>
5121 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
5122 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
5123 ret <4 x i64> %4
5124}
5125declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
5126
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005127define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5128; GENERIC-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005129; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005130; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5131; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5132; GENERIC-NEXT: retq # sched: [1:1.00]
5133;
5134; HASWELL-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005135; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005136; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005137; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5138; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005139;
Gadi Haber85d99b42017-10-17 13:45:39 +00005140; BROADWELL-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005141; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005142; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005143; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5144; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005145;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005146; SKYLAKE-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005147; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005148; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005149; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5150; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005151;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005152; SKX-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005153; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005154; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005155; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5156; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005157;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005158; ZNVER1-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005159; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005160; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5161; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5162; ZNVER1-NEXT: retq # sched: [1:0.50]
5163 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
5164 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5165 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
5166 ret <32 x i8> %3
5167}
5168declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
5169
5170define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
5171; GENERIC-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005172; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005173; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5174; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005175; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005176; GENERIC-NEXT: retq # sched: [1:1.00]
5177;
5178; HASWELL-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005179; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005180; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005181; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005182; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005183; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005184;
Gadi Haber85d99b42017-10-17 13:45:39 +00005185; BROADWELL-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005186; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005187; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005188; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005189; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005190; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005191;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005192; SKYLAKE-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005193; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005194; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005195; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5196; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5197; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005198;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005199; SKX-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005200; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005201; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005202; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5203; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5204; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005205;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005206; ZNVER1-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005207; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005208; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
5209; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
Craig Topperd4341922017-09-18 03:29:47 +00005210; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005211; ZNVER1-NEXT: retq # sched: [1:0.50]
5212 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
5213 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
5214 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Craig Topperd4341922017-09-18 03:29:47 +00005215 %4 = add <8 x i32> %1, %3
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005216 ret <8 x i32> %4
5217}
5218
5219define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
5220; GENERIC-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005221; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005222; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5223; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
5224; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5225; GENERIC-NEXT: retq # sched: [1:1.00]
5226;
5227; HASWELL-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005228; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005229; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005230; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005231; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005232; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005233;
Gadi Haber85d99b42017-10-17 13:45:39 +00005234; BROADWELL-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005235; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005236; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005237; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005238; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005239; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005240;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005241; SKYLAKE-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005242; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005243; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005244; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5245; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5246; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005247;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005248; SKX-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005249; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005250; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005251; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5252; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5253; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005254;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005255; ZNVER1-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005256; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005257; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
5258; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
5259; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5260; ZNVER1-NEXT: retq # sched: [1:0.50]
5261 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
5262 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5263 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
5264 %4 = or <16 x i16> %1, %3
5265 ret <16 x i16> %4
5266}
5267
5268define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
5269; GENERIC-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005270; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005271; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5272; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
5273; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5274; GENERIC-NEXT: retq # sched: [1:1.00]
5275;
5276; HASWELL-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005277; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005278; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005279; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005280; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005281; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005282;
Gadi Haber85d99b42017-10-17 13:45:39 +00005283; BROADWELL-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005284; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005285; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005286; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005287; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005288; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005289;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005290; SKYLAKE-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005291; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005292; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005293; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5294; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5295; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005296;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005297; SKX-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005298; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005299; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005300; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5301; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5302; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005303;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005304; ZNVER1-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005305; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005306; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
5307; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
5308; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5309; ZNVER1-NEXT: retq # sched: [1:0.50]
5310 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
5311 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5312 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
5313 %4 = or <16 x i16> %1, %3
5314 ret <16 x i16> %4
5315}
5316
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005317define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5318; GENERIC-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005319; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005320; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5321; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5322; GENERIC-NEXT: retq # sched: [1:1.00]
5323;
5324; HASWELL-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005325; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005326; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005327; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5328; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005329;
Gadi Haber85d99b42017-10-17 13:45:39 +00005330; BROADWELL-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005331; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005332; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005333; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5334; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005335;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005336; SKYLAKE-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005337; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005338; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5339; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5340; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005341;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005342; SKX-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005343; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005344; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5345; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5346; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005347;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005348; ZNVER1-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005349; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005350; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5351; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5352; ZNVER1-NEXT: retq # sched: [1:0.50]
5353 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
5354 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5355 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
5356 ret <32 x i8> %3
5357}
5358declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
5359
5360define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5361; GENERIC-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005362; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005363; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5364; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5365; GENERIC-NEXT: retq # sched: [1:1.00]
5366;
5367; HASWELL-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005368; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005369; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005370; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5371; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005372;
Gadi Haber85d99b42017-10-17 13:45:39 +00005373; BROADWELL-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005374; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005375; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005376; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5377; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005378;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005379; SKYLAKE-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005380; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005381; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5382; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5383; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005384;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005385; SKX-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005386; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005387; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5388; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5389; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005390;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005391; ZNVER1-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005392; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005393; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5394; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5395; ZNVER1-NEXT: retq # sched: [1:0.50]
5396 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
5397 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5398 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
5399 ret <8 x i32> %3
5400}
5401declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
5402
5403define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5404; GENERIC-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005405; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005406; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5407; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5408; GENERIC-NEXT: retq # sched: [1:1.00]
5409;
5410; HASWELL-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005411; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005412; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005413; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5414; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005415;
Gadi Haber85d99b42017-10-17 13:45:39 +00005416; BROADWELL-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005417; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005418; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005419; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5420; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005421;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005422; SKYLAKE-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005423; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005424; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5425; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5426; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005427;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005428; SKX-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005429; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005430; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5431; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5432; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005433;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005434; ZNVER1-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005435; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005436; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5437; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5438; ZNVER1-NEXT: retq # sched: [1:0.50]
5439 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
5440 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5441 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
5442 ret <16 x i16> %3
5443}
5444declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
5445
5446define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5447; GENERIC-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005448; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005449; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5450; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5451; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5452; GENERIC-NEXT: retq # sched: [1:1.00]
5453;
5454; HASWELL-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005455; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005456; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005457; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005458; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005459; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005460;
Gadi Haber85d99b42017-10-17 13:45:39 +00005461; BROADWELL-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005462; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005463; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005464; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005465; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005466; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005467;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005468; SKYLAKE-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005469; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005470; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005471; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5472; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5473; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005474;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005475; SKX-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005476; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005477; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005478; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5479; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5480; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005481;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005482; ZNVER1-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005483; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005484; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5485; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5486; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
5487; ZNVER1-NEXT: retq # sched: [1:0.50]
5488 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
5489 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5490 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
5491 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5492 ret <8 x i32> %4
5493}
5494declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
5495
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005496define <32 x i8> @test_pslldq(<32 x i8> %a0) {
5497; GENERIC-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005498; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005499; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5500; GENERIC-NEXT: retq # sched: [1:1.00]
5501;
5502; HASWELL-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005503; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005504; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005505; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005506;
Gadi Haber85d99b42017-10-17 13:45:39 +00005507; BROADWELL-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005508; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005509; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005510; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005511;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005512; SKYLAKE-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005513; SKYLAKE: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005514; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005515; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005516;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005517; SKX-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005518; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005519; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005520; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005521;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005522; ZNVER1-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005523; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005524; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
5525; ZNVER1-NEXT: retq # sched: [1:0.50]
5526 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
5527 ret <32 x i8> %1
5528}
5529
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005530define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5531; GENERIC-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005532; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005533; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5534; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5535; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5536; GENERIC-NEXT: retq # sched: [1:1.00]
5537;
5538; HASWELL-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005539; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005540; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005541; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005542; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005543; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005544;
Gadi Haber85d99b42017-10-17 13:45:39 +00005545; BROADWELL-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005546; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005547; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005548; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005549; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005550; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005551;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005552; SKYLAKE-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005553; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005554; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005555; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5556; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5557; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005558;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005559; SKX-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005560; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005561; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005562; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5563; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5564; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005565;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005566; ZNVER1-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005567; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005568; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5569; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5570; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
5571; ZNVER1-NEXT: retq # sched: [1:0.50]
5572 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
5573 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5574 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
5575 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5576 ret <4 x i64> %4
5577}
5578declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
5579
5580define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5581; GENERIC-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005582; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005583; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5584; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5585; GENERIC-NEXT: retq # sched: [1:1.00]
5586;
5587; HASWELL-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005588; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005589; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005590; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5591; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005592;
Gadi Haber85d99b42017-10-17 13:45:39 +00005593; BROADWELL-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005594; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005595; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005596; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5597; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005598;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005599; SKYLAKE-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005600; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005601; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5602; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5603; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005604;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005605; SKX-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005606; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005607; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5608; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5609; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005610;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005611; ZNVER1-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005612; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005613; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5614; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5615; ZNVER1-NEXT: retq # sched: [1:0.50]
5616 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
5617 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5618 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
5619 ret <4 x i32> %3
5620}
5621declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
5622
5623define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5624; GENERIC-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005625; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005626; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5627; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5628; GENERIC-NEXT: retq # sched: [1:1.00]
5629;
5630; HASWELL-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005631; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005632; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005633; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5634; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005635;
Gadi Haber85d99b42017-10-17 13:45:39 +00005636; BROADWELL-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005637; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005638; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005639; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5640; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005641;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005642; SKYLAKE-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005643; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005644; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5645; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5646; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005647;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005648; SKX-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005649; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005650; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5651; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5652; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005653;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005654; ZNVER1-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005655; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005656; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5657; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5658; ZNVER1-NEXT: retq # sched: [1:0.50]
5659 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5660 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5661 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
5662 ret <8 x i32> %3
5663}
5664declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5665
5666define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5667; GENERIC-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005668; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005669; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5670; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5671; GENERIC-NEXT: retq # sched: [1:1.00]
5672;
5673; HASWELL-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005674; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005675; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005676; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5677; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005678;
Gadi Haber85d99b42017-10-17 13:45:39 +00005679; BROADWELL-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005680; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005681; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005682; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
5683; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005684;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005685; SKYLAKE-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005686; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005687; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5688; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5689; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005690;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005691; SKX-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005692; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005693; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5694; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5695; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005696;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005697; ZNVER1-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005698; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005699; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5700; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5701; ZNVER1-NEXT: retq # sched: [1:0.50]
5702 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
5703 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5704 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
5705 ret <2 x i64> %3
5706}
5707declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
5708
5709define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5710; GENERIC-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005711; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005712; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5713; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5714; GENERIC-NEXT: retq # sched: [1:1.00]
5715;
5716; HASWELL-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005717; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005718; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005719; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5720; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005721;
Gadi Haber85d99b42017-10-17 13:45:39 +00005722; BROADWELL-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005723; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005724; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005725; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5726; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005727;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005728; SKYLAKE-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005729; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005730; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5731; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5732; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005733;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005734; SKX-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005735; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005736; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5737; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5738; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005739;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005740; ZNVER1-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005741; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005742; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5743; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5744; ZNVER1-NEXT: retq # sched: [1:0.50]
5745 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5746 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5747 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
5748 ret <4 x i64> %3
5749}
5750declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5751
5752define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5753; GENERIC-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005754; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005755; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5756; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5757; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5758; GENERIC-NEXT: retq # sched: [1:1.00]
5759;
5760; HASWELL-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005761; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005762; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005763; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005764; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005765; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005766;
Gadi Haber85d99b42017-10-17 13:45:39 +00005767; BROADWELL-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005768; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005769; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005770; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005771; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005772; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005773;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005774; SKYLAKE-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005775; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005776; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005777; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5778; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5779; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005780;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005781; SKX-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005782; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005783; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005784; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5785; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5786; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005787;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005788; ZNVER1-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005789; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005790; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5791; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5792; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
5793; ZNVER1-NEXT: retq # sched: [1:0.50]
5794 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
5795 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5796 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
5797 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5798 ret <16 x i16> %4
5799}
5800declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
5801
5802define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5803; GENERIC-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005804; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005805; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5806; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5807; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5808; GENERIC-NEXT: retq # sched: [1:1.00]
5809;
5810; HASWELL-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005811; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005812; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005813; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005814; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005815; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005816;
Gadi Haber85d99b42017-10-17 13:45:39 +00005817; BROADWELL-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005818; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005819; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005820; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005821; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005822; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005823;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005824; SKYLAKE-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005825; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005826; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005827; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5828; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5829; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005830;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005831; SKX-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005832; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005833; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005834; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5835; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5836; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005837;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005838; ZNVER1-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005839; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005840; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5841; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5842; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5843; ZNVER1-NEXT: retq # sched: [1:0.50]
5844 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5845 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5846 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5847 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5848 ret <8 x i32> %4
5849}
5850declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5851
5852define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5853; GENERIC-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005854; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005855; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5856; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5857; GENERIC-NEXT: retq # sched: [1:1.00]
5858;
5859; HASWELL-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005860; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005861; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005862; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5863; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005864;
Gadi Haber85d99b42017-10-17 13:45:39 +00005865; BROADWELL-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005866; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005867; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005868; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5869; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005870;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005871; SKYLAKE-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005872; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005873; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5874; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5875; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005876;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005877; SKX-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005878; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005879; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5880; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5881; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005882;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005883; ZNVER1-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005884; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005885; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5886; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5887; ZNVER1-NEXT: retq # sched: [1:0.50]
5888 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5889 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5890 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5891 ret <4 x i32> %3
5892}
5893declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5894
5895define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5896; GENERIC-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005897; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005898; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5899; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5900; GENERIC-NEXT: retq # sched: [1:1.00]
5901;
5902; HASWELL-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005903; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005904; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005905; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5906; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005907;
Gadi Haber85d99b42017-10-17 13:45:39 +00005908; BROADWELL-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005909; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005910; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005911; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5912; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005913;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005914; SKYLAKE-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005915; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005916; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5917; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5918; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005919;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005920; SKX-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005921; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005922; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5923; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5924; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005925;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005926; ZNVER1-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005927; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005928; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5929; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5930; ZNVER1-NEXT: retq # sched: [1:0.50]
5931 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5932 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5933 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5934 ret <8 x i32> %3
5935}
5936declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5937
5938define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5939; GENERIC-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005940; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005941; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5942; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5943; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5944; GENERIC-NEXT: retq # sched: [1:1.00]
5945;
5946; HASWELL-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005947; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005948; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005949; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005950; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005951; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005952;
Gadi Haber85d99b42017-10-17 13:45:39 +00005953; BROADWELL-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005954; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005955; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005956; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005957; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005958; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005959;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005960; SKYLAKE-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005961; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005962; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005963; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5964; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5965; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005966;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005967; SKX-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005968; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005969; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005970; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5971; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5972; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005973;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005974; ZNVER1-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005975; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005976; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5977; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5978; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5979; ZNVER1-NEXT: retq # sched: [1:0.50]
5980 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5981 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5982 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5983 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5984 ret <16 x i16> %4
5985}
5986declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5987
5988define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5989; GENERIC-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005990; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005991; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5992; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5993; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5994; GENERIC-NEXT: retq # sched: [1:1.00]
5995;
5996; HASWELL-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005997; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005998; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005999; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006000; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006001; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006002;
Gadi Haber85d99b42017-10-17 13:45:39 +00006003; BROADWELL-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006004; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006005; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006006; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006007; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006008; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006009;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006010; SKYLAKE-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006011; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006012; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006013; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6014; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6015; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006016;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006017; SKX-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006018; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006019; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006020; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6021; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6022; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006023;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006024; ZNVER1-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006025; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006026; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6027; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6028; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
6029; ZNVER1-NEXT: retq # sched: [1:0.50]
6030 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
6031 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6032 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
6033 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
6034 ret <8 x i32> %4
6035}
6036declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
6037
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006038define <32 x i8> @test_psrldq(<32 x i8> %a0) {
6039; GENERIC-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006040; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006041; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6042; GENERIC-NEXT: retq # sched: [1:1.00]
6043;
6044; HASWELL-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006045; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006046; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006047; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006048;
Gadi Haber85d99b42017-10-17 13:45:39 +00006049; BROADWELL-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006050; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006051; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006052; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006053;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006054; SKYLAKE-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006055; SKYLAKE: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006056; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006057; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006058;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006059; SKX-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006060; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006061; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006062; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006063;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006064; ZNVER1-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006065; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006066; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
6067; ZNVER1-NEXT: retq # sched: [1:0.50]
6068 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
6069 ret <32 x i8> %1
6070}
6071
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006072define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6073; GENERIC-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006074; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006075; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6076; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6077; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6078; GENERIC-NEXT: retq # sched: [1:1.00]
6079;
6080; HASWELL-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006081; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006082; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006083; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006084; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006085; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006086;
Gadi Haber85d99b42017-10-17 13:45:39 +00006087; BROADWELL-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006088; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006089; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006090; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006091; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006092; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006093;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006094; SKYLAKE-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006095; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006096; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006097; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6098; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6099; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006100;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006101; SKX-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006102; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006103; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006104; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6105; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6106; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006107;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006108; ZNVER1-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006109; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006110; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6111; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6112; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
6113; ZNVER1-NEXT: retq # sched: [1:0.50]
6114 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
6115 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6116 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
6117 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
6118 ret <4 x i64> %4
6119}
6120declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
6121
6122define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
6123; GENERIC-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006124; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006125; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6126; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6127; GENERIC-NEXT: retq # sched: [1:1.00]
6128;
6129; HASWELL-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006130; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006131; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006132; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
6133; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006134;
Gadi Haber85d99b42017-10-17 13:45:39 +00006135; BROADWELL-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006136; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006137; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006138; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
6139; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006140;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006141; SKYLAKE-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006142; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006143; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6144; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6145; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006146;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006147; SKX-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006148; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006149; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6150; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6151; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006152;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006153; ZNVER1-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006154; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006155; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6156; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6157; ZNVER1-NEXT: retq # sched: [1:0.50]
6158 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
6159 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6160 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
6161 ret <4 x i32> %3
6162}
6163declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
6164
6165define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6166; GENERIC-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006167; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006168; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6169; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6170; GENERIC-NEXT: retq # sched: [1:1.00]
6171;
6172; HASWELL-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006173; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006174; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006175; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
6176; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006177;
Gadi Haber85d99b42017-10-17 13:45:39 +00006178; BROADWELL-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006179; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006180; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006181; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
6182; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006183;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006184; SKYLAKE-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006185; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006186; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6187; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6188; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006189;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006190; SKX-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006191; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006192; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6193; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6194; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006195;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006196; ZNVER1-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006197; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006198; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6199; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6200; ZNVER1-NEXT: retq # sched: [1:0.50]
6201 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
6202 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6203 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
6204 ret <8 x i32> %3
6205}
6206declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
6207
6208define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6209; GENERIC-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006210; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006211; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6212; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6213; GENERIC-NEXT: retq # sched: [1:1.00]
6214;
6215; HASWELL-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006216; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006217; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006218; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
6219; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006220;
Gadi Haber85d99b42017-10-17 13:45:39 +00006221; BROADWELL-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006222; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006223; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006224; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
6225; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006226;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006227; SKYLAKE-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006228; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006229; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6230; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6231; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006232;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006233; SKX-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006234; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006235; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6236; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6237; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006238;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006239; ZNVER1-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006240; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006241; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6242; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6243; ZNVER1-NEXT: retq # sched: [1:0.50]
6244 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
6245 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6246 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
6247 ret <2 x i64> %3
6248}
6249declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
6250
6251define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6252; GENERIC-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006253; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006254; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6255; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6256; GENERIC-NEXT: retq # sched: [1:1.00]
6257;
6258; HASWELL-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006259; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006260; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006261; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6262; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006263;
Gadi Haber85d99b42017-10-17 13:45:39 +00006264; BROADWELL-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006265; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006266; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006267; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6268; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006269;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006270; SKYLAKE-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006271; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006272; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6273; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6274; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006275;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006276; SKX-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006277; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006278; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6279; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6280; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006281;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006282; ZNVER1-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006283; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006284; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6285; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6286; ZNVER1-NEXT: retq # sched: [1:0.50]
6287 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
6288 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6289 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
6290 ret <4 x i64> %3
6291}
6292declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
6293
6294define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
6295; GENERIC-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006296; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006297; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6298; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6299; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6300; GENERIC-NEXT: retq # sched: [1:1.00]
6301;
6302; HASWELL-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006303; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006304; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006305; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006306; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006307; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006308;
Gadi Haber85d99b42017-10-17 13:45:39 +00006309; BROADWELL-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006310; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006311; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006312; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006313; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006314; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006315;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006316; SKYLAKE-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006317; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006318; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006319; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6320; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6321; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006322;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006323; SKX-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006324; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006325; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006326; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6327; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6328; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006329;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006330; ZNVER1-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006331; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006332; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6333; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6334; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
6335; ZNVER1-NEXT: retq # sched: [1:0.50]
6336 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
6337 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
6338 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
6339 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
6340 ret <16 x i16> %4
6341}
6342declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
6343
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006344define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006345; GENERIC-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006346; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006347; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6348; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6349; GENERIC-NEXT: retq # sched: [1:1.00]
6350;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006351; HASWELL-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006352; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006353; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006354; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6355; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006356;
Gadi Haber85d99b42017-10-17 13:45:39 +00006357; BROADWELL-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006358; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006359; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006360; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6361; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006362;
Gadi Haber767d98b2017-08-30 08:08:50 +00006363; SKYLAKE-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006364; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006365; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6366; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6367; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006368;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006369; SKX-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006370; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006371; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6372; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6373; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006374;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006375; ZNVER1-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006376; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006377; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6378; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006379; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006380 %1 = sub <32 x i8> %a0, %a1
6381 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6382 %3 = sub <32 x i8> %1, %2
6383 ret <32 x i8> %3
6384}
6385
6386define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006387; GENERIC-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006388; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006389; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6390; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6391; GENERIC-NEXT: retq # sched: [1:1.00]
6392;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006393; HASWELL-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006394; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006395; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006396; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6397; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006398;
Gadi Haber85d99b42017-10-17 13:45:39 +00006399; BROADWELL-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006400; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006401; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006402; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6403; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006404;
Gadi Haber767d98b2017-08-30 08:08:50 +00006405; SKYLAKE-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006406; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006407; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6408; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6409; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006410;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006411; SKX-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006412; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006413; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6414; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6415; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006416;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006417; ZNVER1-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006418; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006419; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6420; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006421; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006422 %1 = sub <8 x i32> %a0, %a1
6423 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6424 %3 = sub <8 x i32> %1, %2
6425 ret <8 x i32> %3
6426}
6427
6428define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006429; GENERIC-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006430; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006431; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6432; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6433; GENERIC-NEXT: retq # sched: [1:1.00]
6434;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006435; HASWELL-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006436; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006437; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006438; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6439; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006440;
Gadi Haber85d99b42017-10-17 13:45:39 +00006441; BROADWELL-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006442; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006443; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006444; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6445; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006446;
Gadi Haber767d98b2017-08-30 08:08:50 +00006447; SKYLAKE-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006448; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006449; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6450; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6451; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006452;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006453; SKX-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006454; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006455; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6456; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6457; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006458;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006459; ZNVER1-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006460; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006461; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6462; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006463; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006464 %1 = sub <4 x i64> %a0, %a1
6465 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6466 %3 = sub <4 x i64> %1, %2
6467 ret <4 x i64> %3
6468}
6469
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006470define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6471; GENERIC-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006472; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006473; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6474; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6475; GENERIC-NEXT: retq # sched: [1:1.00]
6476;
6477; HASWELL-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006478; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006479; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006480; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6481; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006482;
Gadi Haber85d99b42017-10-17 13:45:39 +00006483; BROADWELL-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006484; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006485; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006486; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6487; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006488;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006489; SKYLAKE-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006490; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006491; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6492; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6493; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006494;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006495; SKX-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006496; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006497; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6498; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6499; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006500;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006501; ZNVER1-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006502; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006503; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6504; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6505; ZNVER1-NEXT: retq # sched: [1:0.50]
6506 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
6507 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6508 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2)
6509 ret <32 x i8> %3
6510}
6511declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
6512
6513define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6514; GENERIC-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006515; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006516; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6517; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6518; GENERIC-NEXT: retq # sched: [1:1.00]
6519;
6520; HASWELL-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006521; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006522; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006523; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6524; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006525;
Gadi Haber85d99b42017-10-17 13:45:39 +00006526; BROADWELL-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006527; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006528; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006529; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6530; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006531;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006532; SKYLAKE-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006533; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006534; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6535; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6536; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006537;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006538; SKX-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006539; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006540; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6541; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6542; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006543;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006544; ZNVER1-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006545; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006546; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6547; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6548; ZNVER1-NEXT: retq # sched: [1:0.50]
6549 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
6550 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6551 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2)
6552 ret <16 x i16> %3
6553}
6554declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
6555
6556define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6557; GENERIC-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006558; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006559; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6560; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6561; GENERIC-NEXT: retq # sched: [1:1.00]
6562;
6563; HASWELL-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006564; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006565; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006566; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6567; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006568;
Gadi Haber85d99b42017-10-17 13:45:39 +00006569; BROADWELL-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006570; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006571; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006572; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6573; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006574;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006575; SKYLAKE-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006576; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006577; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6578; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6579; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006580;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006581; SKX-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006582; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006583; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6584; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6585; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006586;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006587; ZNVER1-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006588; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006589; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6590; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6591; ZNVER1-NEXT: retq # sched: [1:0.50]
6592 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
6593 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6594 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2)
6595 ret <32 x i8> %3
6596}
6597declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
6598
6599define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6600; GENERIC-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006601; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006602; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6603; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6604; GENERIC-NEXT: retq # sched: [1:1.00]
6605;
6606; HASWELL-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006607; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006608; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006609; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6610; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006611;
Gadi Haber85d99b42017-10-17 13:45:39 +00006612; BROADWELL-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006613; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006614; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006615; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6616; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006617;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006618; SKYLAKE-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006619; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006620; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6621; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6622; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006623;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006624; SKX-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006625; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006626; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6627; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6628; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006629;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006630; ZNVER1-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006631; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006632; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6633; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6634; ZNVER1-NEXT: retq # sched: [1:0.50]
6635 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
6636 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6637 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2)
6638 ret <16 x i16> %3
6639}
6640declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
6641
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006642define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006643; GENERIC-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006644; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006645; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6646; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6647; GENERIC-NEXT: retq # sched: [1:1.00]
6648;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006649; HASWELL-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006650; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006651; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006652; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6653; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006654;
Gadi Haber85d99b42017-10-17 13:45:39 +00006655; BROADWELL-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006656; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006657; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006658; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6659; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006660;
Gadi Haber767d98b2017-08-30 08:08:50 +00006661; SKYLAKE-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006662; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006663; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6664; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6665; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006666;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006667; SKX-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006668; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006669; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6670; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6671; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006672;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006673; ZNVER1-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006674; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006675; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6676; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006677; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006678 %1 = sub <16 x i16> %a0, %a1
6679 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6680 %3 = sub <16 x i16> %1, %2
6681 ret <16 x i16> %3
6682}
6683
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006684define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6685; GENERIC-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006686; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006687; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6688; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
6689; GENERIC-NEXT: retq # sched: [1:1.00]
6690;
6691; HASWELL-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006692; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006693; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006694; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6695; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006696;
Gadi Haber85d99b42017-10-17 13:45:39 +00006697; BROADWELL-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006698; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006699; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006700; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
6701; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006702;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006703; SKYLAKE-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006704; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006705; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006706; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6707; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006708;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006709; SKX-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006710; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006711; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006712; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6713; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006714;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006715; ZNVER1-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006716; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006717; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
6718; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
6719; ZNVER1-NEXT: retq # sched: [1:0.50]
6720 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6721 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6722 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6723 ret <32 x i8> %3
6724}
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006725
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006726define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6727; GENERIC-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006728; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006729; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6730; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
6731; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6732; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6733; GENERIC-NEXT: retq # sched: [1:1.00]
6734;
6735; HASWELL-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006736; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006737; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006738; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006739; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6740; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006741; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006742;
Gadi Haber85d99b42017-10-17 13:45:39 +00006743; BROADWELL-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006744; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006745; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006746; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006747; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6748; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006749; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006750;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006751; SKYLAKE-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006752; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006753; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006754; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6755; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6756; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6757; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006758;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006759; SKX-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006760; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006761; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006762; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6763; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6764; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6765; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006766;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006767; ZNVER1-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006768; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006769; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
6770; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
6771; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6772; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6773; ZNVER1-NEXT: retq # sched: [1:0.50]
6774 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6775 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6776 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6777 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6778 ret <8 x i32> %4
6779}
6780
6781define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6782; GENERIC-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006783; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006784; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6785; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
6786; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6787; GENERIC-NEXT: retq # sched: [1:1.00]
6788;
6789; HASWELL-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006790; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006791; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006792; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006793; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006794; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006795;
Gadi Haber85d99b42017-10-17 13:45:39 +00006796; BROADWELL-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006797; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006798; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006799; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006800; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006801; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006802;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006803; SKYLAKE-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006804; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006805; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006806; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6807; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6808; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006809;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006810; SKX-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006811; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006812; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006813; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6814; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6815; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006816;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006817; ZNVER1-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006818; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006819; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
6820; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
6821; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6822; ZNVER1-NEXT: retq # sched: [1:0.50]
6823 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6824 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6825 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6826 %4 = add <4 x i64> %1, %3
6827 ret <4 x i64> %4
6828}
6829
6830define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6831; GENERIC-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006832; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006833; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6834; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
6835; GENERIC-NEXT: retq # sched: [1:1.00]
6836;
6837; HASWELL-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006838; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006839; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006840; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6841; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006842;
Gadi Haber85d99b42017-10-17 13:45:39 +00006843; BROADWELL-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006844; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006845; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006846; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
6847; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006848;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006849; SKYLAKE-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006850; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006851; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006852; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6853; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006854;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006855; SKX-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006856; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006857; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006858; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6859; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006860;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006861; ZNVER1-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006862; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006863; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
6864; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
6865; ZNVER1-NEXT: retq # sched: [1:0.50]
6866 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6867 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6868 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6869 ret <16 x i16> %3
6870}
6871
6872define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6873; GENERIC-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006874; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006875; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6876; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
6877; GENERIC-NEXT: retq # sched: [1:1.00]
6878;
6879; HASWELL-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006880; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006881; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006882; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6883; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006884;
Gadi Haber85d99b42017-10-17 13:45:39 +00006885; BROADWELL-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006886; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006887; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006888; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
6889; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006890;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006891; SKYLAKE-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006892; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006893; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006894; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6895; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006896;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006897; SKX-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006898; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006899; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006900; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6901; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006902;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006903; ZNVER1-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006904; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006905; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
6906; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
6907; ZNVER1-NEXT: retq # sched: [1:0.50]
6908 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6909 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6910 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6911 ret <32 x i8> %3
6912}
6913
6914define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6915; GENERIC-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006916; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006917; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6918; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
6919; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6920; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6921; GENERIC-NEXT: retq # sched: [1:1.00]
6922;
6923; HASWELL-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006924; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006925; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006926; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006927; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6928; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006929; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006930;
Gadi Haber85d99b42017-10-17 13:45:39 +00006931; BROADWELL-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006932; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006933; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006934; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006935; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6936; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006937; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006938;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006939; SKYLAKE-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006940; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006941; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006942; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6943; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6944; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6945; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006946;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006947; SKX-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006948; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006949; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006950; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6951; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6952; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6953; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006954;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006955; ZNVER1-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006956; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006957; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
6958; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
6959; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6960; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6961; ZNVER1-NEXT: retq # sched: [1:0.50]
6962 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6963 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6964 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6965 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6966 ret <8 x i32> %4
6967}
6968
6969define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6970; GENERIC-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006971; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006972; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6973; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
6974; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6975; GENERIC-NEXT: retq # sched: [1:1.00]
6976;
6977; HASWELL-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006978; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006979; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006980; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006981; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006982; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006983;
Gadi Haber85d99b42017-10-17 13:45:39 +00006984; BROADWELL-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006985; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006986; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006987; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006988; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006989; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006990;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006991; SKYLAKE-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006992; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006993; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006994; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6995; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6996; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006997;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006998; SKX-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006999; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007000; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00007001; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
7002; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
7003; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007004;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007005; ZNVER1-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007006; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007007; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
7008; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
7009; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
7010; ZNVER1-NEXT: retq # sched: [1:0.50]
7011 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7012 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7013 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7014 %4 = add <4 x i64> %1, %3
7015 ret <4 x i64> %4
7016}
7017
7018define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
7019; GENERIC-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007020; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007021; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7022; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
7023; GENERIC-NEXT: retq # sched: [1:1.00]
7024;
7025; HASWELL-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007026; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007027; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007028; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7029; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007030;
Gadi Haber85d99b42017-10-17 13:45:39 +00007031; BROADWELL-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007032; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00007033; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00007034; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
7035; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007036;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007037; SKYLAKE-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007038; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007039; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007040; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7041; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007042;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007043; SKX-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007044; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007045; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00007046; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7047; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007048;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007049; ZNVER1-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007050; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007051; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
7052; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
7053; ZNVER1-NEXT: retq # sched: [1:0.50]
7054 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7055 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
7056 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7057 ret <16 x i16> %3
7058}
7059
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007060define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00007061; GENERIC-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007062; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00007063; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
7064; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
7065; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
7066; GENERIC-NEXT: retq # sched: [1:1.00]
7067;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007068; HASWELL-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007069; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007070; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007071; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007072; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007073; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007074;
Gadi Haber85d99b42017-10-17 13:45:39 +00007075; BROADWELL-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007076; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00007077; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00007078; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00007079; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00007080; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007081;
Gadi Haber767d98b2017-08-30 08:08:50 +00007082; SKYLAKE-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007083; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007084; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7085; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7086; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7087; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00007088;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007089; SKX-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007090; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00007091; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7092; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7093; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7094; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007095;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007096; ZNVER1-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007097; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00007098; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7099; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7100; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00007101; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007102 %1 = xor <4 x i64> %a0, %a1
7103 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7104 %3 = xor <4 x i64> %1, %2
7105 %4 = add <4 x i64> %3, %a1
7106 ret <4 x i64> %4
7107}
7108
7109!0 = !{i32 1}