blob: f85cec97bfbf4f52f56adf6cc84a1071051d7189 [file] [log] [blame]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim84846982017-08-01 15:14:35 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
Simon Pilgrim946f08c2017-05-06 13:46:09 +00003; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
Gadi Haber85d99b42017-10-17 13:45:39 +00004; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
Gadi Haber767d98b2017-08-30 08:08:50 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
8
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00009define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
10; GENERIC-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000011; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000012; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
13; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
14; GENERIC-NEXT: retq # sched: [1:1.00]
15;
16; HASWELL-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +000018; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000019; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +000020; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000021;
Gadi Haber85d99b42017-10-17 13:45:39 +000022; BROADWELL-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000023; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +000024; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +000025; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +000026; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000027;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000028; SKYLAKE-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000029; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +000030; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
31; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
32; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000033;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000034; SKX-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +000036; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
37; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
38; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000039;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000040; ZNVER1-LABEL: test_broadcasti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000041; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +000042; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
43; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
44; ZNVER1-NEXT: retq # sched: [1:0.50]
45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
47 %3 = add <8 x i32> %2, %a0
48 ret <8 x i32> %3
49}
50
Simon Pilgrim5a931c62017-09-12 11:17:01 +000051define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
52; GENERIC-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000053; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000054; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
55; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
56; GENERIC-NEXT: retq # sched: [1:1.00]
57;
58; HASWELL-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000060; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
61; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +000062; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000063;
Gadi Haber85d99b42017-10-17 13:45:39 +000064; BROADWELL-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000065; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +000066; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
67; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +000068; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +000069;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000070; SKYLAKE-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000071; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000072; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +000073; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +000074; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +000075;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000076; SKX-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000078; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +000079; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
80; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +000081;
Simon Pilgrim5a931c62017-09-12 11:17:01 +000082; ZNVER1-LABEL: test_broadcastsd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000083; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000084; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
85; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
86; ZNVER1-NEXT: retq # sched: [1:0.50]
87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
88 %2 = fadd <4 x double> %1, %1
89 ret <4 x double> %2
90}
91
92define <4 x float> @test_broadcastss(<4 x float> %a0) {
93; GENERIC-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000094; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +000095; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
96; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
97; GENERIC-NEXT: retq # sched: [1:1.00]
98;
99; HASWELL-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000101; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
102; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000103; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000104;
Gadi Haber85d99b42017-10-17 13:45:39 +0000105; BROADWELL-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000106; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000107; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
108; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000109; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000110;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000111; SKYLAKE-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000112; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000113; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000114; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000115; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000116;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000117; SKX-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000119; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000120; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
121; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000122;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000123; ZNVER1-LABEL: test_broadcastss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000124; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000125; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
126; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
127; ZNVER1-NEXT: retq # sched: [1:0.50]
128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
129 %2 = fadd <4 x float> %1, %1
130 ret <4 x float> %2
131}
132
133define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
134; GENERIC-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000135; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000136; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
137; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
138; GENERIC-NEXT: retq # sched: [1:1.00]
139;
140; HASWELL-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000141; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000142; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
143; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000144; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000145;
Gadi Haber85d99b42017-10-17 13:45:39 +0000146; BROADWELL-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000147; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000148; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
149; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000150; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000151;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000152; SKYLAKE-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000153; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000154; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000155; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000156; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000157;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000158; SKX-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000159; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000160; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000161; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
162; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000163;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000164; ZNVER1-LABEL: test_broadcastss_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000165; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000166; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
167; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
168; ZNVER1-NEXT: retq # sched: [1:0.50]
169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
170 %2 = fadd <8 x float> %1, %1
171 ret <8 x float> %2
172}
173
174define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
175; GENERIC-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000177; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
178; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
179; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
180; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +0000181; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000182; GENERIC-NEXT: retq # sched: [1:1.00]
183;
184; HASWELL-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000185; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000186; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
187; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
188; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
189; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
190; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000191; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000192;
Gadi Haber85d99b42017-10-17 13:45:39 +0000193; BROADWELL-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000194; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000195; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
196; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
197; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
198; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
199; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000200; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000201;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000202; SKYLAKE-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000203; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000204; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
205; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000206; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
207; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
208; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000209; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000210;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000211; SKX-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000212; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000213; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
214; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000215; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
216; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
217; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000218; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000219;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000220; ZNVER1-LABEL: test_extracti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000221; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000222; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
223; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
224; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25]
225; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50]
226; ZNVER1-NEXT: vzeroupper # sched: [100:?]
227; ZNVER1-NEXT: retq # sched: [1:0.50]
228 %1 = add <8 x i32> %a0, %a1
229 %2 = sub <8 x i32> %a0, %a1
230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
232 store <4 x i32> %3, <4 x i32> *%a2
233 ret <4 x i32> %4
234}
235
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000236define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
237; GENERIC-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000238; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000239; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000240; GENERIC-NEXT: retq # sched: [1:1.00]
241;
242; HASWELL-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000243; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000244; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
245; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000246;
Gadi Haber85d99b42017-10-17 13:45:39 +0000247; BROADWELL-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000248; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000249; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000251;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000252; SKYLAKE-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000253; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000254; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
255; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000256;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000257; SKX-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000258; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000259; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
260; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000261;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000262; ZNVER1-LABEL: test_gatherdpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000263; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000264; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
265; ZNVER1-NEXT: retq # sched: [1:0.50]
266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
267 ret <2 x double> %1
268}
269declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
270
271define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
272; GENERIC-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000273; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000274; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000275; GENERIC-NEXT: retq # sched: [1:1.00]
276;
277; HASWELL-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000278; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000279; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00]
280; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000281;
Gadi Haber85d99b42017-10-17 13:45:39 +0000282; BROADWELL-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000283; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000284; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
285; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000286;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000287; SKYLAKE-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000288; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000289; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
290; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000291;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000292; SKX-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000293; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000294; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
295; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000296;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000297; ZNVER1-LABEL: test_gatherdpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000298; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000299; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
300; ZNVER1-NEXT: retq # sched: [1:0.50]
301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
302 ret <4 x double> %1
303}
304declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
305
306define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
307; GENERIC-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000308; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000309; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000310; GENERIC-NEXT: retq # sched: [1:1.00]
311;
312; HASWELL-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000314; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
315; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000316;
Gadi Haber85d99b42017-10-17 13:45:39 +0000317; BROADWELL-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000319; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
320; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000321;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000322; SKYLAKE-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000323; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000324; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
325; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000326;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000327; SKX-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000328; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000329; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
330; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000331;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000332; ZNVER1-LABEL: test_gatherdps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000333; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000334; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
335; ZNVER1-NEXT: retq # sched: [1:0.50]
336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
337 ret <4 x float> %1
338}
339declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
340
341define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
342; GENERIC-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000343; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000344; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000345; GENERIC-NEXT: retq # sched: [1:1.00]
346;
347; HASWELL-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000348; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000349; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50]
350; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000351;
Gadi Haber85d99b42017-10-17 13:45:39 +0000352; BROADWELL-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000353; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000354; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
355; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000356;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000357; SKYLAKE-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000358; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000359; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
360; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000361;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000362; SKX-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000363; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000364; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
365; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000366;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000367; ZNVER1-LABEL: test_gatherdps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000368; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000369; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
370; ZNVER1-NEXT: retq # sched: [1:0.50]
371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
372 ret <8 x float> %1
373}
374declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
375
376define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
377; GENERIC-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000378; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000379; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000380; GENERIC-NEXT: retq # sched: [1:1.00]
381;
382; HASWELL-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000384; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
385; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000386;
Gadi Haber85d99b42017-10-17 13:45:39 +0000387; BROADWELL-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000388; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000389; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
390; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000391;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000392; SKYLAKE-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000393; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000394; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000397; SKX-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000398; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000399; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
400; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000401;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000402; ZNVER1-LABEL: test_gatherqpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000403; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000404; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
405; ZNVER1-NEXT: retq # sched: [1:0.50]
406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
407 ret <2 x double> %1
408}
409declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
410
411define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
412; GENERIC-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000413; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000414; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000415; GENERIC-NEXT: retq # sched: [1:1.00]
416;
417; HASWELL-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000418; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000419; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00]
420; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000421;
Gadi Haber85d99b42017-10-17 13:45:39 +0000422; BROADWELL-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000423; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000424; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
425; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000426;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000427; SKYLAKE-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000428; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000429; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
430; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000431;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000432; SKX-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000433; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000434; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
435; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000436;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000437; ZNVER1-LABEL: test_gatherqpd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000438; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000439; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
440; ZNVER1-NEXT: retq # sched: [1:0.50]
441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
442 ret <4 x double> %1
443}
444declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
445
446define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
447; GENERIC-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000448; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000449; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000450; GENERIC-NEXT: retq # sched: [1:1.00]
451;
452; HASWELL-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000453; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000454; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
455; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000456;
Gadi Haber85d99b42017-10-17 13:45:39 +0000457; BROADWELL-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000458; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000459; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
460; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000461;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000462; SKYLAKE-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000463; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000464; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
465; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000466;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000467; SKX-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000468; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000469; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
470; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000471;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000472; ZNVER1-LABEL: test_gatherqps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000473; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000474; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
475; ZNVER1-NEXT: retq # sched: [1:0.50]
476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
477 ret <4 x float> %1
478}
479declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
480
481define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
482; GENERIC-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000483; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +0000484; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +0000485; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000486; GENERIC-NEXT: retq # sched: [1:1.00]
487;
488; HASWELL-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000489; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000490; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000491; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000492; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000493;
Gadi Haber85d99b42017-10-17 13:45:39 +0000494; BROADWELL-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000495; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000496; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000497; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000498; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000499;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000500; SKYLAKE-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000501; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000502; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000503; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000504; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000505;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000506; SKX-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000507; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000508; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000509; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000510; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000511;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000512; ZNVER1-LABEL: test_gatherqps_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000513; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000514; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
515; ZNVER1-NEXT: vzeroupper # sched: [100:?]
516; ZNVER1-NEXT: retq # sched: [1:0.50]
517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
518 ret <4 x float> %1
519}
520declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
521
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000522define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
523; GENERIC-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000524; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000525; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
526; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
527; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
528; GENERIC-NEXT: retq # sched: [1:1.00]
529;
530; HASWELL-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000531; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000532; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000533; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000534; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000535; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000536;
Gadi Haber85d99b42017-10-17 13:45:39 +0000537; BROADWELL-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000538; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000539; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000540; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000541; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000542; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000543;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000544; SKYLAKE-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000545; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000546; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000547; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
548; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
549; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000550;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000551; SKX-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000552; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000553; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000554; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
555; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
556; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000557;
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000558; ZNVER1-LABEL: test_inserti128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000559; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +0000560; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
561; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
562; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
563; ZNVER1-NEXT: retq # sched: [1:0.50]
564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
569 %6 = add <8 x i32> %2, %5
570 ret <8 x i32> %6
571}
572
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000573define <4 x i64> @test_movntdqa(i8* %a0) {
574; GENERIC-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000575; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000576; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [4:0.50]
577; GENERIC-NEXT: retq # sched: [1:1.00]
578;
579; HASWELL-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000580; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +0000581; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
582; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000583;
Gadi Haber85d99b42017-10-17 13:45:39 +0000584; BROADWELL-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000585; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +0000586; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
587; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000588;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000589; SKYLAKE-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000590; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000591; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
592; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000593;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000594; SKX-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000595; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000596; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
597; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000598;
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000599; ZNVER1-LABEL: test_movntdqa:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000600; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +0000601; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
602; ZNVER1-NEXT: retq # sched: [1:0.50]
603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
604 ret <4 x i64> %1
605}
606declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
607
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000608define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
609; GENERIC-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000610; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000611; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
612; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
613; GENERIC-NEXT: retq # sched: [1:1.00]
614;
615; HASWELL-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000616; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000617; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000618; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
619; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000620;
Gadi Haber85d99b42017-10-17 13:45:39 +0000621; BROADWELL-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000622; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000623; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000624; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
625; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000626;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000627; SKYLAKE-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000628; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +0000629; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000630; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
631; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000632;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000633; SKX-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000634; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000635; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000636; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
637; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000638;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000639; ZNVER1-LABEL: test_mpsadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000640; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000641; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
642; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
643; ZNVER1-NEXT: retq # sched: [1:0.50]
644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
645 %2 = bitcast <16 x i16> %1 to <32 x i8>
646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
648 ret <16 x i16> %4
649}
650declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
651
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000652define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000653; GENERIC-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000654; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000655; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
656; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00]
657; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
658; GENERIC-NEXT: retq # sched: [1:1.00]
659;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000660; HASWELL-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000661; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000662; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000663; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000664; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000665; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000666;
Gadi Haber85d99b42017-10-17 13:45:39 +0000667; BROADWELL-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000668; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000669; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000670; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000671; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000672; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000673;
Gadi Haber767d98b2017-08-30 08:08:50 +0000674; SKYLAKE-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000675; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000676; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
677; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
678; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
679; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000680;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000681; SKX-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000682; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000683; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
684; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
685; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
686; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000687;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000688; ZNVER1-LABEL: test_pabsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000689; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000690; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
691; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
692; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000693; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
697 %4 = or <32 x i8> %1, %3
698 ret <32 x i8> %4
699}
700declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
701
702define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000703; GENERIC-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000704; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000705; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
706; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00]
707; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
708; GENERIC-NEXT: retq # sched: [1:1.00]
709;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000710; HASWELL-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000711; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000712; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000713; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000714; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000715; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000716;
Gadi Haber85d99b42017-10-17 13:45:39 +0000717; BROADWELL-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000718; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000719; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000720; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000721; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000723;
Gadi Haber767d98b2017-08-30 08:08:50 +0000724; SKYLAKE-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000725; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000726; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
727; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
728; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
729; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000730;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000731; SKX-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000732; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000733; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
734; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
735; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
736; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000737;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000738; ZNVER1-LABEL: test_pabsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000739; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000740; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
741; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
742; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000743; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
747 %4 = or <8 x i32> %1, %3
748 ret <8 x i32> %4
749}
750declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
751
752define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000753; GENERIC-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000754; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000755; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
756; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00]
757; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
758; GENERIC-NEXT: retq # sched: [1:1.00]
759;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000760; HASWELL-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000761; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000762; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000763; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000764; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000765; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000766;
Gadi Haber85d99b42017-10-17 13:45:39 +0000767; BROADWELL-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000768; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000769; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000770; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +0000771; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +0000772; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000773;
Gadi Haber767d98b2017-08-30 08:08:50 +0000774; SKYLAKE-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000775; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000776; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
777; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
778; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
779; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +0000780;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000781; SKX-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000782; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +0000783; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
784; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
785; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
786; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000787;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000788; ZNVER1-LABEL: test_pabsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000789; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +0000790; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
791; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
792; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +0000793; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
797 %4 = or <16 x i16> %1, %3
798 ret <16 x i16> %4
799}
800declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
801
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000802define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
803; GENERIC-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000804; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000805; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
806; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
807; GENERIC-NEXT: retq # sched: [1:1.00]
808;
809; HASWELL-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000810; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000811; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000812; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
813; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000814;
Gadi Haber85d99b42017-10-17 13:45:39 +0000815; BROADWELL-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000816; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000817; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000818; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
819; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000820;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000821; SKYLAKE-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000822; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000823; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000824; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
825; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000826;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000827; SKX-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000828; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000829; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000830; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
831; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000832;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000833; ZNVER1-LABEL: test_packssdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000834; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000835; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
836; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
837; ZNVER1-NEXT: retq # sched: [1:0.50]
838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
839 %2 = bitcast <16 x i16> %1 to <8 x i32>
840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
842 ret <16 x i16> %4
843}
844declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
845
846define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
847; GENERIC-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000848; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000849; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
850; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
851; GENERIC-NEXT: retq # sched: [1:1.00]
852;
853; HASWELL-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000854; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000855; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000856; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
857; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000858;
Gadi Haber85d99b42017-10-17 13:45:39 +0000859; BROADWELL-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000860; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000861; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000862; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
863; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000864;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000865; SKYLAKE-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000866; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000867; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000868; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
869; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000870;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000871; SKX-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000872; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000873; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000874; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
875; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000876;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000877; ZNVER1-LABEL: test_packsswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000878; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000879; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
880; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
881; ZNVER1-NEXT: retq # sched: [1:0.50]
882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
883 %2 = bitcast <32 x i8> %1 to <16 x i16>
884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
886 ret <32 x i8> %4
887}
888declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
889
890define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
891; GENERIC-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000892; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000893; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
894; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
895; GENERIC-NEXT: retq # sched: [1:1.00]
896;
897; HASWELL-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000898; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000899; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000900; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
901; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000902;
Gadi Haber85d99b42017-10-17 13:45:39 +0000903; BROADWELL-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000904; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000905; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000906; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
907; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000908;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000909; SKYLAKE-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000910; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000911; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000912; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
913; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000914;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000915; SKX-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000916; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000917; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000918; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
919; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000920;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000921; ZNVER1-LABEL: test_packusdw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000922; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000923; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
924; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
925; ZNVER1-NEXT: retq # sched: [1:0.50]
926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
927 %2 = bitcast <16 x i16> %1 to <8 x i32>
928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
930 ret <16 x i16> %4
931}
932declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
933
934define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
935; GENERIC-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000936; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000937; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
938; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
939; GENERIC-NEXT: retq # sched: [1:1.00]
940;
941; HASWELL-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000942; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000943; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000944; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
945; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000946;
Gadi Haber85d99b42017-10-17 13:45:39 +0000947; BROADWELL-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000948; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000949; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +0000950; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
951; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000952;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000953; SKYLAKE-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000954; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000955; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000956; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
957; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000958;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000959; SKX-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000960; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000961; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +0000962; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
963; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +0000964;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000965; ZNVER1-LABEL: test_packuswb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000966; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +0000967; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
968; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
969; ZNVER1-NEXT: retq # sched: [1:0.50]
970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
971 %2 = bitcast <32 x i8> %1 to <16 x i16>
972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
974 ret <32 x i8> %4
975}
976declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
977
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000978define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +0000979; GENERIC-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000980; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +0000981; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
982; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
983; GENERIC-NEXT: retq # sched: [1:1.00]
984;
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000985; HASWELL-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000986; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000987; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +0000988; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
989; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +0000990;
Gadi Haber85d99b42017-10-17 13:45:39 +0000991; BROADWELL-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000992; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +0000993; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +0000994; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
995; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +0000996;
Gadi Haber767d98b2017-08-30 08:08:50 +0000997; SKYLAKE-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000998; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +0000999; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1000; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1001; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001002;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001003; SKX-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001004; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001005; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1006; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1007; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001008;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001009; ZNVER1-LABEL: test_paddb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001010; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001011; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1012; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001013; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001014 %1 = add <32 x i8> %a0, %a1
1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1016 %3 = add <32 x i8> %1, %2
1017 ret <32 x i8> %3
1018}
1019
1020define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001021; GENERIC-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001022; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001023; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1024; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1025; GENERIC-NEXT: retq # sched: [1:1.00]
1026;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001027; HASWELL-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001028; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001029; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001030; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1031; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001032;
Gadi Haber85d99b42017-10-17 13:45:39 +00001033; BROADWELL-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001034; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001035; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001036; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1037; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001038;
Gadi Haber767d98b2017-08-30 08:08:50 +00001039; SKYLAKE-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001040; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001041; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1042; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001045; SKX-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001046; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001047; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1048; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1049; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001050;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001051; ZNVER1-LABEL: test_paddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001052; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001053; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1054; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001055; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001056 %1 = add <8 x i32> %a0, %a1
1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1058 %3 = add <8 x i32> %1, %2
1059 ret <8 x i32> %3
1060}
1061
1062define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001063; GENERIC-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001064; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001065; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1066; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1067; GENERIC-NEXT: retq # sched: [1:1.00]
1068;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001069; HASWELL-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001070; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001071; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001072; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1073; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001074;
Gadi Haber85d99b42017-10-17 13:45:39 +00001075; BROADWELL-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001076; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001077; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001078; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1079; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001080;
Gadi Haber767d98b2017-08-30 08:08:50 +00001081; SKYLAKE-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001082; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001083; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1084; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1085; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001086;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001087; SKX-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001088; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001089; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1090; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1091; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001092;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001093; ZNVER1-LABEL: test_paddq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001094; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001095; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1096; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001097; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001098 %1 = add <4 x i64> %a0, %a1
1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1100 %3 = add <4 x i64> %1, %2
1101 ret <4 x i64> %3
1102}
1103
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001104define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1105; GENERIC-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001106; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001107; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1108; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1109; GENERIC-NEXT: retq # sched: [1:1.00]
1110;
1111; HASWELL-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001112; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001113; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001114; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1115; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001116;
Gadi Haber85d99b42017-10-17 13:45:39 +00001117; BROADWELL-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001118; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001119; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001120; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1121; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001122;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001123; SKYLAKE-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001124; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001125; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1126; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1127; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001128;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001129; SKX-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001130; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001131; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1132; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1133; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001134;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001135; ZNVER1-LABEL: test_paddsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001136; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001137; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1138; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1139; ZNVER1-NEXT: retq # sched: [1:0.50]
1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2)
1143 ret <32 x i8> %3
1144}
1145declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
1146
1147define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1148; GENERIC-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001149; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001150; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1151; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1152; GENERIC-NEXT: retq # sched: [1:1.00]
1153;
1154; HASWELL-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001155; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001156; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001157; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1158; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001159;
Gadi Haber85d99b42017-10-17 13:45:39 +00001160; BROADWELL-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001161; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001162; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001163; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1164; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001165;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001166; SKYLAKE-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001167; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001168; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1169; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1170; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001171;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001172; SKX-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001173; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001174; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1175; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1176; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001177;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001178; ZNVER1-LABEL: test_paddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001179; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001180; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1181; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1182; ZNVER1-NEXT: retq # sched: [1:0.50]
1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2)
1186 ret <16 x i16> %3
1187}
1188declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
1189
1190define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1191; GENERIC-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001192; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001193; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1194; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1195; GENERIC-NEXT: retq # sched: [1:1.00]
1196;
1197; HASWELL-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001198; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001199; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001200; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1201; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001202;
Gadi Haber85d99b42017-10-17 13:45:39 +00001203; BROADWELL-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001204; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001205; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001206; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1207; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001208;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001209; SKYLAKE-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001210; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001211; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1212; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1213; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001214;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001215; SKX-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001216; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001217; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1218; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1219; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001220;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001221; ZNVER1-LABEL: test_paddusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001222; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001223; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1224; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1225; ZNVER1-NEXT: retq # sched: [1:0.50]
1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2)
1229 ret <32 x i8> %3
1230}
1231declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
1232
1233define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1234; GENERIC-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001235; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001236; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1237; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1238; GENERIC-NEXT: retq # sched: [1:1.00]
1239;
1240; HASWELL-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001241; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001242; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001243; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1244; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001245;
Gadi Haber85d99b42017-10-17 13:45:39 +00001246; BROADWELL-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001247; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001248; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001249; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1250; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001251;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001252; SKYLAKE-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001253; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001254; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1255; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1256; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001257;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001258; SKX-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001259; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001260; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1261; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1262; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001263;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001264; ZNVER1-LABEL: test_paddusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001265; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001266; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1267; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1268; ZNVER1-NEXT: retq # sched: [1:0.50]
1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2)
1272 ret <16 x i16> %3
1273}
1274declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
1275
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001276define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001277; GENERIC-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001278; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001279; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1280; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1281; GENERIC-NEXT: retq # sched: [1:1.00]
1282;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001283; HASWELL-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001284; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001285; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001286; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1287; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001288;
Gadi Haber85d99b42017-10-17 13:45:39 +00001289; BROADWELL-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001290; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001291; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001292; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1293; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001294;
Gadi Haber767d98b2017-08-30 08:08:50 +00001295; SKYLAKE-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001296; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001297; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1298; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1299; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001300;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001301; SKX-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001302; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001303; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1304; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1305; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001306;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001307; ZNVER1-LABEL: test_paddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001308; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001309; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1310; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001311; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001312 %1 = add <16 x i16> %a0, %a1
1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1314 %3 = add <16 x i16> %1, %2
1315 ret <16 x i16> %3
1316}
1317
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001318define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1319; GENERIC-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001320; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001321; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1322; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [5:1.00]
1323; GENERIC-NEXT: retq # sched: [1:1.00]
1324;
1325; HASWELL-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001326; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001327; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001328; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1329; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001330;
Gadi Haber85d99b42017-10-17 13:45:39 +00001331; BROADWELL-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001332; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001333; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001334; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [7:1.00]
1335; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001336;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001337; SKYLAKE-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001338; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001339; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001340; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1341; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001342;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001343; SKX-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001344; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001345; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001346; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1347; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001348;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001349; ZNVER1-LABEL: test_palignr:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001350; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001351; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1352; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:0.50]
1353; ZNVER1-NEXT: retq # sched: [1:0.50]
1354 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1355 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1356 %3 = shufflevector <32 x i8> %2, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1357 ret <32 x i8> %3
1358}
1359
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001360define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001361; GENERIC-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001362; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001363; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1364; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
1365; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1366; GENERIC-NEXT: retq # sched: [1:1.00]
1367;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001368; HASWELL-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001369; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001370; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001371; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001372; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001373; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001374;
Gadi Haber85d99b42017-10-17 13:45:39 +00001375; BROADWELL-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001376; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001377; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001378; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001379; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001380; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001381;
Gadi Haber767d98b2017-08-30 08:08:50 +00001382; SKYLAKE-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001383; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001384; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1385; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1386; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1387; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001388;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001389; SKX-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001390; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001391; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1392; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1393; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1394; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001395;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001396; ZNVER1-LABEL: test_pand:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001397; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001398; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1399; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1400; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001401; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001402 %1 = and <4 x i64> %a0, %a1
1403 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1404 %3 = and <4 x i64> %1, %2
1405 %4 = add <4 x i64> %3, %a1
1406 ret <4 x i64> %4
1407}
1408
1409define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00001410; GENERIC-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001411; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00001412; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1413; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
1414; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1415; GENERIC-NEXT: retq # sched: [1:1.00]
1416;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001417; HASWELL-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001418; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001419; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001420; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001421; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001422; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001423;
Gadi Haber85d99b42017-10-17 13:45:39 +00001424; BROADWELL-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001425; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001426; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001427; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001428; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001429; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001430;
Gadi Haber767d98b2017-08-30 08:08:50 +00001431; SKYLAKE-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001432; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001433; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1434; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1435; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1436; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00001437;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001438; SKX-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001439; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001440; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1441; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1442; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1443; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001444;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001445; ZNVER1-LABEL: test_pandn:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001446; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00001447; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1448; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1449; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00001450; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00001451 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1452 %2 = and <4 x i64> %a1, %1
1453 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1454 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1455 %5 = and <4 x i64> %3, %4
1456 %6 = add <4 x i64> %2, %5
1457 ret <4 x i64> %6
1458}
1459
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001460define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1461; GENERIC-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001462; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001463; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1464; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1465; GENERIC-NEXT: retq # sched: [1:1.00]
1466;
1467; HASWELL-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001468; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001469; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001470; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1471; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001472;
Gadi Haber85d99b42017-10-17 13:45:39 +00001473; BROADWELL-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001474; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001475; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001476; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1477; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001478;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001479; SKYLAKE-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001480; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001481; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1482; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1483; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001484;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001485; SKX-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001486; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001487; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1488; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1489; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001490;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001491; ZNVER1-LABEL: test_pavgb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001492; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001493; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1494; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1495; ZNVER1-NEXT: retq # sched: [1:0.50]
1496 %1 = zext <32 x i8> %a0 to <32 x i16>
1497 %2 = zext <32 x i8> %a1 to <32 x i16>
1498 %3 = add <32 x i16> %1, %2
1499 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1500 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1501 %6 = trunc <32 x i16> %5 to <32 x i8>
1502 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1503 %8 = zext <32 x i8> %6 to <32 x i16>
1504 %9 = zext <32 x i8> %7 to <32 x i16>
1505 %10 = add <32 x i16> %8, %9
1506 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1507 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1508 %13 = trunc <32 x i16> %12 to <32 x i8>
1509 ret <32 x i8> %13
1510}
1511
1512define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1513; GENERIC-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001514; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001515; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1516; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1517; GENERIC-NEXT: retq # sched: [1:1.00]
1518;
1519; HASWELL-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001520; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001521; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001522; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1523; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001524;
Gadi Haber85d99b42017-10-17 13:45:39 +00001525; BROADWELL-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001526; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001527; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001528; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1529; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001530;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001531; SKYLAKE-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001532; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001533; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1534; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1535; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001536;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001537; SKX-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001538; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001539; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1540; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1541; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001542;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001543; ZNVER1-LABEL: test_pavgw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001544; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001545; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1546; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1547; ZNVER1-NEXT: retq # sched: [1:0.50]
1548 %1 = zext <16 x i16> %a0 to <16 x i32>
1549 %2 = zext <16 x i16> %a1 to <16 x i32>
1550 %3 = add <16 x i32> %1, %2
1551 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1552 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1553 %6 = trunc <16 x i32> %5 to <16 x i16>
1554 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1555 %8 = zext <16 x i16> %6 to <16 x i32>
1556 %9 = zext <16 x i16> %7 to <16 x i32>
1557 %10 = add <16 x i32> %8, %9
1558 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1559 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1560 %13 = trunc <16 x i32> %12 to <16 x i16>
1561 ret <16 x i16> %13
1562}
1563
1564define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1565; GENERIC-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001566; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001567; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1568; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
1569; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1570; GENERIC-NEXT: retq # sched: [1:1.00]
1571;
1572; HASWELL-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001573; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001574; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001575; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001576; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001577; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001578;
Gadi Haber85d99b42017-10-17 13:45:39 +00001579; BROADWELL-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001580; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001581; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001582; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001583; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001584; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001585;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001586; SKYLAKE-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001587; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001588; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1589; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1590; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1591; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001592;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001593; SKX-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001594; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001595; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1596; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1597; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1598; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001599;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001600; ZNVER1-LABEL: test_pblendd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001601; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001602; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1603; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1604; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1605; ZNVER1-NEXT: retq # sched: [1:0.50]
1606 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1607 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1608 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1609 %4 = add <4 x i32> %a0, %3
1610 ret <4 x i32> %4
1611}
1612
1613define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1614; GENERIC-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001615; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001616; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1617; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
1618; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1619; GENERIC-NEXT: retq # sched: [1:1.00]
1620;
1621; HASWELL-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001622; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001623; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001624; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001625; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001626; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001627;
Gadi Haber85d99b42017-10-17 13:45:39 +00001628; BROADWELL-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001629; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001630; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00001631; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001632; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001633; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001634;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001635; SKYLAKE-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001636; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001637; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1638; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1639; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1640; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001641;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001642; SKX-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001643; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00001644; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1645; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1646; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1647; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001648;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001649; ZNVER1-LABEL: test_pblendd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001650; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001651; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1652; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1653; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1654; ZNVER1-NEXT: retq # sched: [1:0.50]
1655 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1656 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1657 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1658 %4 = add <8 x i32> %a0, %3
1659 ret <8 x i32> %4
1660}
1661
1662define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1663; GENERIC-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001664; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001665; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1666; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1667; GENERIC-NEXT: retq # sched: [1:1.00]
1668;
1669; HASWELL-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001670; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001671; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001672; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
1673; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001674;
Gadi Haber85d99b42017-10-17 13:45:39 +00001675; BROADWELL-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001676; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001677; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001678; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1679; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001680;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001681; SKYLAKE-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001682; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00001683; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001684; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1685; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001686;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001687; SKX-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001688; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001689; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00001690; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1691; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001692;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001693; ZNVER1-LABEL: test_pblendvb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001694; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001695; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1696; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1697; ZNVER1-NEXT: retq # sched: [1:0.50]
1698 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1699 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1700 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1701 ret <32 x i8> %3
1702}
1703declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1704
1705define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1706; GENERIC-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001707; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001708; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
1709; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [5:0.50]
1710; GENERIC-NEXT: retq # sched: [1:1.00]
1711;
1712; HASWELL-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001713; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001714; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001715; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1716; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001717;
Gadi Haber85d99b42017-10-17 13:45:39 +00001718; BROADWELL-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001719; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001720; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001721; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [7:1.00]
1722; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001723;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001724; SKYLAKE-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001725; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001726; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001727; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1728; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001729;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001730; SKX-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001731; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001732; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001733; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1734; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001735;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001736; ZNVER1-LABEL: test_pblendw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001737; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00001738; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
1739; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [9:0.50]
1740; ZNVER1-NEXT: retq # sched: [1:0.50]
1741 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1742 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1743 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1744 ret <16 x i16> %3
1745}
1746
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001747define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1748; GENERIC-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001749; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001750; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
1751; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
1752; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1753; GENERIC-NEXT: retq # sched: [1:1.00]
1754;
1755; HASWELL-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001756; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001757; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001758; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001759; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001760; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001761;
Gadi Haber85d99b42017-10-17 13:45:39 +00001762; BROADWELL-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001763; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00001764; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00001765; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001766; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001767; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001768;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001769; SKYLAKE-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001770; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001771; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001772; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1773; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1774; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001775;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001776; SKX-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001777; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001778; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001779; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1780; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1781; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001782;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001783; ZNVER1-LABEL: test_pbroadcastb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001784; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001785; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1786; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1787; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1788; ZNVER1-NEXT: retq # sched: [1:0.50]
1789 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1790 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1791 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1792 %4 = add <16 x i8> %1, %3
1793 ret <16 x i8> %4
1794}
1795
1796define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1797; GENERIC-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001798; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001799; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1800; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
1801; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1802; GENERIC-NEXT: retq # sched: [1:1.00]
1803;
1804; HASWELL-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001805; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001806; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001807; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001808; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001809; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001810;
Gadi Haber85d99b42017-10-17 13:45:39 +00001811; BROADWELL-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001812; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00001813; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00001814; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001815; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001816; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001817;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001818; SKYLAKE-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001819; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001820; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001821; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1822; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1823; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001824;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001825; SKX-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001826; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001827; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001828; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1829; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1830; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001831;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001832; ZNVER1-LABEL: test_pbroadcastb_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001833; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001834; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1835; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1836; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1837; ZNVER1-NEXT: retq # sched: [1:0.50]
1838 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1839 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1840 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1841 %4 = add <32 x i8> %1, %3
1842 ret <32 x i8> %4
1843}
1844
1845define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1846; GENERIC-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001847; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001848; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1849; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
1850; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1851; GENERIC-NEXT: retq # sched: [1:1.00]
1852;
1853; HASWELL-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001854; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001855; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001856; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001857; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001858; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001859;
Gadi Haber85d99b42017-10-17 13:45:39 +00001860; BROADWELL-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001861; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001862; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001863; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001864; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001865; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001866;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001867; SKYLAKE-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001868; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001869; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001870; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1871; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1872; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001873;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001874; SKX-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001875; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001876; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001877; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
1878; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001879;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001880; ZNVER1-LABEL: test_pbroadcastd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001881; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001882; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1883; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1884; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1885; ZNVER1-NEXT: retq # sched: [1:0.50]
1886 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1887 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1888 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1889 %4 = add <4 x i32> %1, %3
1890 ret <4 x i32> %4
1891}
1892
1893define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1894; GENERIC-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001895; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001896; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1897; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
1898; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1899; GENERIC-NEXT: retq # sched: [1:1.00]
1900;
1901; HASWELL-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001902; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001903; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001904; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001905; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001906; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001907;
Gadi Haber85d99b42017-10-17 13:45:39 +00001908; BROADWELL-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001909; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001910; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001911; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001912; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001913; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001914;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001915; SKYLAKE-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001916; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001917; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001918; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1919; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1920; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001921;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001922; SKX-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001923; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001924; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001925; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1926; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001927;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001928; ZNVER1-LABEL: test_pbroadcastd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001929; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001930; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1931; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1932; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1933; ZNVER1-NEXT: retq # sched: [1:0.50]
1934 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1935 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1936 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1937 %4 = add <8 x i32> %1, %3
1938 ret <8 x i32> %4
1939}
1940
1941define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1942; GENERIC-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001943; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001944; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1945; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
1946; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1947; GENERIC-NEXT: retq # sched: [1:1.00]
1948;
1949; HASWELL-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001950; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001951; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001952; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001953; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00001954; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001955;
Gadi Haber85d99b42017-10-17 13:45:39 +00001956; BROADWELL-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001957; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00001958; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00001959; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00001960; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00001961; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00001962;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001963; SKYLAKE-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001964; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001965; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00001966; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1967; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1968; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001969;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001970; SKX-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001971; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001972; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00001973; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50]
1974; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00001975;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001976; ZNVER1-LABEL: test_pbroadcastq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001977; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001978; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1979; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1980; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1981; ZNVER1-NEXT: retq # sched: [1:0.50]
1982 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1983 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1984 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1985 %4 = add <2 x i64> %1, %3
1986 ret <2 x i64> %4
1987}
1988
1989define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
1990; GENERIC-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001991; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001992; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
1993; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
1994; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1995; GENERIC-NEXT: retq # sched: [1:1.00]
1996;
1997; HASWELL-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001998; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00001999; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002000; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002001; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002002; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002003;
Gadi Haber85d99b42017-10-17 13:45:39 +00002004; BROADWELL-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002005; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002006; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002007; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002008; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002009; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002010;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002011; SKYLAKE-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002012; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002013; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002014; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2015; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2016; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002017;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002018; SKX-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002019; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002020; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002021; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50]
2022; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002023;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002024; ZNVER1-LABEL: test_pbroadcastq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002025; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002026; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
2027; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
2028; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2029; ZNVER1-NEXT: retq # sched: [1:0.50]
2030 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
2031 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2032 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
2033 %4 = add <4 x i64> %1, %3
2034 ret <4 x i64> %4
2035}
2036
2037define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
2038; GENERIC-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002039; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002040; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
2041; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
2042; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2043; GENERIC-NEXT: retq # sched: [1:1.00]
2044;
2045; HASWELL-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002046; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002047; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002048; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002049; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002050; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002051;
Gadi Haber85d99b42017-10-17 13:45:39 +00002052; BROADWELL-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002053; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00002054; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00002055; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002056; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002057; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002058;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002059; SKYLAKE-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002060; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002061; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002062; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2063; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2064; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002065;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002066; SKX-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002067; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002068; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002069; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2070; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2071; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002072;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002073; ZNVER1-LABEL: test_pbroadcastw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002074; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002075; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
2076; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
2077; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2078; ZNVER1-NEXT: retq # sched: [1:0.50]
2079 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
2080 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
2081 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
2082 %4 = add <8 x i16> %1, %3
2083 ret <8 x i16> %4
2084}
2085
2086define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
2087; GENERIC-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002088; GENERIC: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002089; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
2090; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
2091; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2092; GENERIC-NEXT: retq # sched: [1:1.00]
2093;
2094; HASWELL-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002095; HASWELL: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002096; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002097; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002098; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002099; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002100;
Gadi Haber85d99b42017-10-17 13:45:39 +00002101; BROADWELL-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002102; BROADWELL: # %bb.0:
Sanjoy Das81a4a022017-12-12 18:40:58 +00002103; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
Sanjoy Das1074eb22017-12-12 19:11:31 +00002104; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002105; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002106; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002107;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002108; SKYLAKE-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002109; SKYLAKE: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002110; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002111; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2112; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2113; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002114;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002115; SKX-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002116; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002117; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002118; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2119; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2120; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002121;
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002122; ZNVER1-LABEL: test_pbroadcastw_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002123; ZNVER1: # %bb.0:
Simon Pilgrimd2d2b372017-09-12 12:59:20 +00002124; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
2125; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
2126; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2127; ZNVER1-NEXT: retq # sched: [1:0.50]
2128 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
2129 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
2130 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
2131 %4 = add <16 x i16> %1, %3
2132 ret <16 x i16> %4
2133}
2134
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002135define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2136; GENERIC-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002137; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002138; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2139; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2140; GENERIC-NEXT: retq # sched: [1:1.00]
2141;
2142; HASWELL-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002143; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002144; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002145; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2146; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002147;
Gadi Haber85d99b42017-10-17 13:45:39 +00002148; BROADWELL-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002149; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002150; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002151; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2152; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002153;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002154; SKYLAKE-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002155; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002156; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2157; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2158; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002159;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002160; SKX-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002161; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002162; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2163; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002164; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002165;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002166; ZNVER1-LABEL: test_pcmpeqb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002167; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002168; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2169; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2170; ZNVER1-NEXT: retq # sched: [1:0.50]
2171 %1 = icmp eq <32 x i8> %a0, %a1
2172 %2 = sext <32 x i1> %1 to <32 x i8>
2173 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2174 %4 = icmp eq <32 x i8> %2, %3
2175 %5 = sext <32 x i1> %4 to <32 x i8>
2176 ret <32 x i8> %5
2177}
2178
2179define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2180; GENERIC-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002181; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002182; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2183; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2184; GENERIC-NEXT: retq # sched: [1:1.00]
2185;
2186; HASWELL-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002187; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002188; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002189; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2190; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002191;
Gadi Haber85d99b42017-10-17 13:45:39 +00002192; BROADWELL-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002193; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002194; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002195; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2196; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002197;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002198; SKYLAKE-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002199; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002200; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2201; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2202; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002203;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002204; SKX-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002205; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002206; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2207; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002208; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002209;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002210; ZNVER1-LABEL: test_pcmpeqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002211; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002212; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2213; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2214; ZNVER1-NEXT: retq # sched: [1:0.50]
2215 %1 = icmp eq <8 x i32> %a0, %a1
2216 %2 = sext <8 x i1> %1 to <8 x i32>
2217 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2218 %4 = icmp eq <8 x i32> %2, %3
2219 %5 = sext <8 x i1> %4 to <8 x i32>
2220 ret <8 x i32> %5
2221}
2222
2223define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2224; GENERIC-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002225; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002226; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2227; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2228; GENERIC-NEXT: retq # sched: [1:1.00]
2229;
2230; HASWELL-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002231; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002232; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002233; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2234; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002235;
Gadi Haber85d99b42017-10-17 13:45:39 +00002236; BROADWELL-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002237; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002238; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002239; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2240; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002241;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002242; SKYLAKE-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002243; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002244; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2245; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2246; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002247;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002248; SKX-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002249; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002250; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2251; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002252; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002253;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002254; ZNVER1-LABEL: test_pcmpeqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002255; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002256; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2257; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2258; ZNVER1-NEXT: retq # sched: [1:0.50]
2259 %1 = icmp eq <4 x i64> %a0, %a1
2260 %2 = sext <4 x i1> %1 to <4 x i64>
2261 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2262 %4 = icmp eq <4 x i64> %2, %3
2263 %5 = sext <4 x i1> %4 to <4 x i64>
2264 ret <4 x i64> %5
2265}
2266
2267define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2268; GENERIC-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002269; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002270; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2271; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2272; GENERIC-NEXT: retq # sched: [1:1.00]
2273;
2274; HASWELL-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002275; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002276; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002277; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2278; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002279;
Gadi Haber85d99b42017-10-17 13:45:39 +00002280; BROADWELL-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002281; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002282; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002283; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2284; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002285;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002286; SKYLAKE-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002287; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002288; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2289; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2290; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002291;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002292; SKX-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002293; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002294; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2295; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002296; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002297;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002298; ZNVER1-LABEL: test_pcmpeqw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002299; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002300; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2301; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2302; ZNVER1-NEXT: retq # sched: [1:0.50]
2303 %1 = icmp eq <16 x i16> %a0, %a1
2304 %2 = sext <16 x i1> %1 to <16 x i16>
2305 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2306 %4 = icmp eq <16 x i16> %2, %3
2307 %5 = sext <16 x i1> %4 to <16 x i16>
2308 ret <16 x i16> %5
2309}
2310
2311define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2312; GENERIC-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002313; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002314; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2315; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2316; GENERIC-NEXT: retq # sched: [1:1.00]
2317;
2318; HASWELL-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002319; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002320; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002321; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2322; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002323;
Gadi Haber85d99b42017-10-17 13:45:39 +00002324; BROADWELL-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002325; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002326; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002327; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2328; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002329;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002330; SKYLAKE-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002331; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002332; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2333; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2334; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002335;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002336; SKX-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002337; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002338; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2339; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002340; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002341;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002342; ZNVER1-LABEL: test_pcmpgtb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002343; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002344; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2345; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2346; ZNVER1-NEXT: retq # sched: [1:0.50]
2347 %1 = icmp sgt <32 x i8> %a0, %a1
2348 %2 = sext <32 x i1> %1 to <32 x i8>
2349 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2350 %4 = icmp sgt <32 x i8> %2, %3
2351 %5 = sext <32 x i1> %4 to <32 x i8>
2352 ret <32 x i8> %5
2353}
2354
2355define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2356; GENERIC-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002357; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002358; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2359; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2360; GENERIC-NEXT: retq # sched: [1:1.00]
2361;
2362; HASWELL-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002363; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002364; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002365; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2366; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002367;
Gadi Haber85d99b42017-10-17 13:45:39 +00002368; BROADWELL-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002369; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002370; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002371; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2372; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002373;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002374; SKYLAKE-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002375; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002376; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2377; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2378; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002379;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002380; SKX-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002381; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002382; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2383; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002384; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002385;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002386; ZNVER1-LABEL: test_pcmpgtd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002387; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002388; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2389; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2390; ZNVER1-NEXT: retq # sched: [1:0.50]
2391 %1 = icmp sgt <8 x i32> %a0, %a1
2392 %2 = sext <8 x i1> %1 to <8 x i32>
2393 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2394 %4 = icmp sgt <8 x i32> %2, %3
2395 %5 = sext <8 x i1> %4 to <8 x i32>
2396 ret <8 x i32> %5
2397}
2398
2399define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2400; GENERIC-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002401; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002402; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2403; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2404; GENERIC-NEXT: retq # sched: [1:1.00]
2405;
2406; HASWELL-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002407; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002408; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002409; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
2410; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002411;
Gadi Haber85d99b42017-10-17 13:45:39 +00002412; BROADWELL-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002413; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002414; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002415; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2416; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002417;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002418; SKYLAKE-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002419; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002420; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002421; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2422; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002423;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002424; SKX-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002425; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002426; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2427; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002428; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002429;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002430; ZNVER1-LABEL: test_pcmpgtq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002431; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002432; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2433; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2434; ZNVER1-NEXT: retq # sched: [1:0.50]
2435 %1 = icmp sgt <4 x i64> %a0, %a1
2436 %2 = sext <4 x i1> %1 to <4 x i64>
2437 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2438 %4 = icmp sgt <4 x i64> %2, %3
2439 %5 = sext <4 x i1> %4 to <4 x i64>
2440 ret <4 x i64> %5
2441}
2442
2443define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2444; GENERIC-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002445; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002446; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2447; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2448; GENERIC-NEXT: retq # sched: [1:1.00]
2449;
2450; HASWELL-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002451; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002452; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002453; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2454; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002455;
Gadi Haber85d99b42017-10-17 13:45:39 +00002456; BROADWELL-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002457; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002458; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002459; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2460; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002461;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002462; SKYLAKE-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002463; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002464; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2465; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2466; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002467;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002468; SKX-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002469; SKX: # %bb.0:
Craig Topperc4d2dd82018-01-09 18:14:22 +00002470; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2471; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Gadi Haber684944b2017-10-08 12:52:54 +00002472; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002473;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002474; ZNVER1-LABEL: test_pcmpgtw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002475; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00002476; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2477; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2478; ZNVER1-NEXT: retq # sched: [1:0.50]
2479 %1 = icmp sgt <16 x i16> %a0, %a1
2480 %2 = sext <16 x i1> %1 to <16 x i16>
2481 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2482 %4 = icmp sgt <16 x i16> %2, %3
2483 %5 = sext <16 x i1> %4 to <16 x i16>
2484 ret <16 x i16> %5
2485}
2486
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002487define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2488; GENERIC-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002489; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002490; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2491; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
2492; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2493; GENERIC-NEXT: retq # sched: [1:1.00]
2494;
2495; HASWELL-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002496; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002497; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002498; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002499; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002500; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002501;
Gadi Haber85d99b42017-10-17 13:45:39 +00002502; BROADWELL-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002503; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002504; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002505; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002506; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002507; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002508;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002509; SKYLAKE-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002510; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002511; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002512; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2513; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2514; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002515;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002516; SKX-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002517; SKX: # %bb.0:
Craig Toppere5d44ce2017-11-04 18:10:03 +00002518; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2519; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002520; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2521; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002522;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002523; ZNVER1-LABEL: test_perm2i128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002524; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002525; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2526; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2527; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2528; ZNVER1-NEXT: retq # sched: [1:0.50]
2529 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2530 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2531 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2532 %4 = add <4 x i64> %1, %3
2533 ret <4 x i64> %4
2534}
2535
2536define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2537; GENERIC-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002538; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002539; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2540; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2541; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2542; GENERIC-NEXT: retq # sched: [1:1.00]
2543;
2544; HASWELL-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002545; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002546; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002547; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002548; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002549; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002550;
Gadi Haber85d99b42017-10-17 13:45:39 +00002551; BROADWELL-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002552; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002553; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002554; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002555; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002556; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002557;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002558; SKYLAKE-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002559; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002560; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002561; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2562; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2563; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002564;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002565; SKX-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002566; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002567; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002568; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2569; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2570; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002571;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002572; ZNVER1-LABEL: test_permd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002573; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002574; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2575; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2576; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2577; ZNVER1-NEXT: retq # sched: [1:0.50]
2578 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2579 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2580 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2581 %4 = add <8 x i32> %1, %3
2582 ret <8 x i32> %4
2583}
2584declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2585
2586define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2587; GENERIC-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002588; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002589; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2590; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2591; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2592; GENERIC-NEXT: retq # sched: [1:1.00]
2593;
2594; HASWELL-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002595; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002596; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002597; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002598; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002599; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002600;
Gadi Haber85d99b42017-10-17 13:45:39 +00002601; BROADWELL-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002602; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002603; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002604; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002605; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002606; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002607;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002608; SKYLAKE-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002609; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002610; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002611; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002612; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002613; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002614;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002615; SKX-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002616; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002617; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002618; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2619; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2620; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002621;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002622; ZNVER1-LABEL: test_permpd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002623; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002624; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2625; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2626; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2627; ZNVER1-NEXT: retq # sched: [1:0.50]
2628 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2629 %2 = load <4 x double>, <4 x double> *%a1, align 32
2630 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2631 %4 = fadd <4 x double> %1, %3
2632 ret <4 x double> %4
2633}
2634
2635define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2636; GENERIC-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002637; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002638; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2639; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2640; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2641; GENERIC-NEXT: retq # sched: [1:1.00]
2642;
2643; HASWELL-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002644; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002645; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002646; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002647; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002648; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002649;
Gadi Haber85d99b42017-10-17 13:45:39 +00002650; BROADWELL-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002651; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002652; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002653; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002654; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002655; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002656;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002657; SKYLAKE-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002658; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002659; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002660; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
Gadi Haber6f8fbf42017-09-19 06:19:27 +00002661; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002662; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002663;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002664; SKX-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002665; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002666; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002667; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2668; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
2669; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002670;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002671; ZNVER1-LABEL: test_permps:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002672; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002673; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2674; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2675; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2676; ZNVER1-NEXT: retq # sched: [1:0.50]
2677 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2678 %2 = load <8 x float>, <8 x float> *%a2, align 32
2679 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2680 %4 = fadd <8 x float> %1, %3
2681 ret <8 x float> %4
2682}
2683declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2684
2685define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2686; GENERIC-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002687; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002688; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2689; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2690; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2691; GENERIC-NEXT: retq # sched: [1:1.00]
2692;
2693; HASWELL-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002694; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002695; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002696; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002697; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002698; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002699;
Gadi Haber85d99b42017-10-17 13:45:39 +00002700; BROADWELL-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002701; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00002702; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002703; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002704; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002705; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002706;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002707; SKYLAKE-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002708; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002709; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002710; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2711; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2712; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002713;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002714; SKX-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002715; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002716; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002717; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2718; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2719; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002720;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002721; ZNVER1-LABEL: test_permq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002722; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00002723; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2724; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2725; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2726; ZNVER1-NEXT: retq # sched: [1:0.50]
2727 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2728 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2729 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2730 %4 = add <4 x i64> %1, %3
2731 ret <4 x i64> %4
2732}
2733
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002734define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2735; GENERIC-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002736; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002737; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002738; GENERIC-NEXT: retq # sched: [1:1.00]
2739;
2740; HASWELL-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002741; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002742; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2743; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002744;
Gadi Haber85d99b42017-10-17 13:45:39 +00002745; BROADWELL-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002746; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002747; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002748; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002749;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002750; SKYLAKE-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002751; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002752; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2753; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002754;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002755; SKX-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002756; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002757; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2758; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002759;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002760; ZNVER1-LABEL: test_pgatherdd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002761; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002762; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2763; ZNVER1-NEXT: retq # sched: [1:0.50]
2764 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2765 ret <4 x i32> %1
2766}
2767declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2768
2769define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2770; GENERIC-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002771; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002772; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002773; GENERIC-NEXT: retq # sched: [1:1.00]
2774;
2775; HASWELL-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002776; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002777; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50]
2778; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002779;
Gadi Haber85d99b42017-10-17 13:45:39 +00002780; BROADWELL-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002781; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002782; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002783; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002784;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002785; SKYLAKE-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002786; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002787; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2788; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002789;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002790; SKX-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002791; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002792; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2793; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002794;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002795; ZNVER1-LABEL: test_pgatherdd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002796; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002797; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2798; ZNVER1-NEXT: retq # sched: [1:0.50]
2799 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2800 ret <8 x i32> %1
2801}
2802declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2803
2804define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2805; GENERIC-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002806; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002807; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002808; GENERIC-NEXT: retq # sched: [1:1.00]
2809;
2810; HASWELL-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002811; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002812; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2813; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002814;
Gadi Haber85d99b42017-10-17 13:45:39 +00002815; BROADWELL-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002816; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002817; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002818; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002819;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002820; SKYLAKE-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002821; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002822; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2823; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002824;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002825; SKX-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002826; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002827; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2828; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002829;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002830; ZNVER1-LABEL: test_pgatherdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002831; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002832; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2833; ZNVER1-NEXT: retq # sched: [1:0.50]
2834 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2835 ret <2 x i64> %1
2836}
2837declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2838
2839define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2840; GENERIC-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002841; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002842; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002843; GENERIC-NEXT: retq # sched: [1:1.00]
2844;
2845; HASWELL-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002846; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002847; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00]
2848; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002849;
Gadi Haber85d99b42017-10-17 13:45:39 +00002850; BROADWELL-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002851; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002852; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002853; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002854;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002855; SKYLAKE-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002856; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002857; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2858; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002859;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002860; SKX-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002861; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002862; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2863; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002864;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002865; ZNVER1-LABEL: test_pgatherdq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002866; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002867; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
2868; ZNVER1-NEXT: retq # sched: [1:0.50]
2869 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2870 ret <4 x i64> %1
2871}
2872declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2873
2874define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2875; GENERIC-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002876; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002877; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002878; GENERIC-NEXT: retq # sched: [1:1.00]
2879;
2880; HASWELL-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002881; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002882; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00]
2883; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002884;
Gadi Haber85d99b42017-10-17 13:45:39 +00002885; BROADWELL-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002886; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002887; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002888; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002889;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002890; SKYLAKE-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002891; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002892; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2893; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002894;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002895; SKX-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002896; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002897; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2898; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002899;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002900; ZNVER1-LABEL: test_pgatherqd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002901; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002902; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2903; ZNVER1-NEXT: retq # sched: [1:0.50]
2904 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2905 ret <4 x i32> %1
2906}
2907declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2908
2909define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2910; GENERIC-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002911; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002912; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +00002913; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002914; GENERIC-NEXT: retq # sched: [1:1.00]
2915;
2916; HASWELL-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002917; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002918; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002919; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00002920; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002921;
Gadi Haber85d99b42017-10-17 13:45:39 +00002922; BROADWELL-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002923; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002924; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00002925; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00002926; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002927;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002928; SKYLAKE-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002929; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002930; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002931; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002932; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002933;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002934; SKX-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002935; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002936; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002937; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00002938; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002939;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002940; ZNVER1-LABEL: test_pgatherqd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002941; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002942; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
2943; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2944; ZNVER1-NEXT: retq # sched: [1:0.50]
2945 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2946 ret <4 x i32> %1
2947}
2948declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2949
2950define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2951; GENERIC-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002952; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002953; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002954; GENERIC-NEXT: retq # sched: [1:1.00]
2955;
2956; HASWELL-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002957; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002958; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
2959; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002960;
Gadi Haber85d99b42017-10-17 13:45:39 +00002961; BROADWELL-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002962; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002963; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002964; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00002965;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002966; SKYLAKE-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002967; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00002968; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2969; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002970;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002971; SKX-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002972; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00002973; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2974; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00002975;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002976; ZNVER1-LABEL: test_pgatherqq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002977; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002978; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2979; ZNVER1-NEXT: retq # sched: [1:0.50]
2980 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2981 ret <2 x i64> %1
2982}
2983declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
2984
2985define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
2986; GENERIC-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002987; GENERIC: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002988; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002989; GENERIC-NEXT: retq # sched: [1:1.00]
2990;
2991; HASWELL-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002992; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00002993; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00]
2994; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00002995;
Gadi Haber85d99b42017-10-17 13:45:39 +00002996; BROADWELL-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00002997; BROADWELL: # %bb.0:
Simon Pilgrimb69dae42017-12-05 20:47:11 +00002998; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00002999; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003000;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003001; SKYLAKE-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003002; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003003; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3004; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003005;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003006; SKX-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003007; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003008; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3009; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003010;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003011; ZNVER1-LABEL: test_pgatherqq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003012; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003013; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
3014; ZNVER1-NEXT: retq # sched: [1:0.50]
3015 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
3016 ret <4 x i64> %1
3017}
3018declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
3019
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003020define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3021; GENERIC-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003022; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003023; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3024; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3025; GENERIC-NEXT: retq # sched: [1:1.00]
3026;
3027; HASWELL-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003028; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003029; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003030; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3031; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003032;
Gadi Haber85d99b42017-10-17 13:45:39 +00003033; BROADWELL-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003034; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003035; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003036; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3037; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003038;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003039; SKYLAKE-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003040; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003041; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003042; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003045; SKX-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003046; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003047; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003048; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3049; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003050;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003051; ZNVER1-LABEL: test_phaddd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003052; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003053; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3054; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3055; ZNVER1-NEXT: retq # sched: [1:0.50]
3056 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
3057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3058 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
3059 ret <8 x i32> %3
3060}
3061declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
3062
3063define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3064; GENERIC-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003065; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003066; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3067; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3068; GENERIC-NEXT: retq # sched: [1:1.00]
3069;
3070; HASWELL-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003071; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003072; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003073; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3074; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003075;
Gadi Haber85d99b42017-10-17 13:45:39 +00003076; BROADWELL-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003077; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003078; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003079; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3080; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003081;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003082; SKYLAKE-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003083; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003084; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003085; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3086; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003087;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003088; SKX-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003089; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003090; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003091; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3092; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003093;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003094; ZNVER1-LABEL: test_phaddsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003095; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003096; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3097; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3098; ZNVER1-NEXT: retq # sched: [1:0.50]
3099 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
3100 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3101 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
3102 ret <16 x i16> %3
3103}
3104declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
3105
3106define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3107; GENERIC-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003108; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003109; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3110; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3111; GENERIC-NEXT: retq # sched: [1:1.00]
3112;
3113; HASWELL-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003114; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003115; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003116; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3117; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003118;
Gadi Haber85d99b42017-10-17 13:45:39 +00003119; BROADWELL-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003120; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003121; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003122; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3123; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003124;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003125; SKYLAKE-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003126; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003127; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003128; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3129; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003130;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003131; SKX-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003132; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003133; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003134; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3135; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003136;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003137; ZNVER1-LABEL: test_phaddw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003138; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003139; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3140; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3141; ZNVER1-NEXT: retq # sched: [1:0.50]
3142 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
3143 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3144 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
3145 ret <16 x i16> %3
3146}
3147declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
3148
3149define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3150; GENERIC-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003151; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003152; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3153; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3154; GENERIC-NEXT: retq # sched: [1:1.00]
3155;
3156; HASWELL-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003157; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003158; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003159; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3160; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003161;
Gadi Haber85d99b42017-10-17 13:45:39 +00003162; BROADWELL-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003163; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003164; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003165; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3166; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003167;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003168; SKYLAKE-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003169; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003170; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003171; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3172; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003173;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003174; SKX-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003175; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003176; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003177; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3178; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003179;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003180; ZNVER1-LABEL: test_phsubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003181; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003182; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3183; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3184; ZNVER1-NEXT: retq # sched: [1:0.50]
3185 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
3186 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3187 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
3188 ret <8 x i32> %3
3189}
3190declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
3191
3192define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3193; GENERIC-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003194; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003195; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3196; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3197; GENERIC-NEXT: retq # sched: [1:1.00]
3198;
3199; HASWELL-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003200; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003201; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003202; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3203; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003204;
Gadi Haber85d99b42017-10-17 13:45:39 +00003205; BROADWELL-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003206; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003207; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003208; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3209; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003210;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003211; SKYLAKE-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003212; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003213; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003214; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3215; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003216;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003217; SKX-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003218; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003219; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003220; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3221; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003222;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003223; ZNVER1-LABEL: test_phsubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003224; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003225; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3226; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3227; ZNVER1-NEXT: retq # sched: [1:0.50]
3228 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
3229 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3230 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
3231 ret <16 x i16> %3
3232}
3233declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
3234
3235define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3236; GENERIC-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003237; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003238; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3239; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3240; GENERIC-NEXT: retq # sched: [1:1.00]
3241;
3242; HASWELL-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003243; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003244; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003245; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3246; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003247;
Gadi Haber85d99b42017-10-17 13:45:39 +00003248; BROADWELL-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003249; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003250; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003251; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3252; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003253;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003254; SKYLAKE-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003255; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003256; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003257; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3258; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003259;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003260; SKX-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003261; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003262; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber684944b2017-10-08 12:52:54 +00003263; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3264; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003265;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003266; ZNVER1-LABEL: test_phsubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003267; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003268; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3269; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3270; ZNVER1-NEXT: retq # sched: [1:0.50]
3271 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
3272 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3273 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
3274 ret <16 x i16> %3
3275}
3276declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
3277
3278define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3279; GENERIC-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003280; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003281; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3282; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3283; GENERIC-NEXT: retq # sched: [1:1.00]
3284;
3285; HASWELL-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003286; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003287; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003288; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3289; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003290;
Gadi Haber85d99b42017-10-17 13:45:39 +00003291; BROADWELL-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003292; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003293; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003294; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3295; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003296;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003297; SKYLAKE-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003298; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003299; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003300; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3301; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003302;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003303; SKX-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003304; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003305; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003306; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3307; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003308;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003309; ZNVER1-LABEL: test_pmaddubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003310; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003311; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3312; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3313; ZNVER1-NEXT: retq # sched: [1:0.50]
3314 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
3315 %2 = bitcast <16 x i16> %1 to <32 x i8>
3316 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
3317 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
3318 ret <16 x i16> %4
3319}
3320declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
3321
3322define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3323; GENERIC-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003324; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003325; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3326; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3327; GENERIC-NEXT: retq # sched: [1:1.00]
3328;
3329; HASWELL-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003330; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003331; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003332; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3333; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003334;
Gadi Haber85d99b42017-10-17 13:45:39 +00003335; BROADWELL-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003336; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003337; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00003338; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3339; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003340;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003341; SKYLAKE-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003342; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00003343; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003344; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3345; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003346;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003347; SKX-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003348; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003349; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003350; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3351; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003352;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003353; ZNVER1-LABEL: test_pmaddwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003354; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003355; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3356; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3357; ZNVER1-NEXT: retq # sched: [1:0.50]
3358 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
3359 %2 = bitcast <8 x i32> %1 to <16 x i16>
3360 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
3361 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
3362 ret <8 x i32> %4
3363}
3364declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
3365
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003366define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
3367; GENERIC-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003368; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003369; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [4:0.50]
3370; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003371; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3372; GENERIC-NEXT: retq # sched: [1:1.00]
3373;
3374; HASWELL-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003375; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003376; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3377; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003378; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003379; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003380;
Gadi Haber85d99b42017-10-17 13:45:39 +00003381; BROADWELL-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003382; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003383; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3384; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003385; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003386; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003387;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003388; SKYLAKE-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003389; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003390; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3391; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003392; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003393; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003394;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003395; SKX-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003396; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003397; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3398; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003399; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003400; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003401;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003402; ZNVER1-LABEL: test_pmaskmovd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003403; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003404; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
3405; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:?]
3406; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3407; ZNVER1-NEXT: retq # sched: [1:0.50]
3408 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
3409 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
3410 ret <4 x i32> %1
3411}
3412declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
3413declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
3414
3415define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
3416; GENERIC-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003417; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003418; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [4:0.50]
3419; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003420; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3421; GENERIC-NEXT: retq # sched: [1:1.00]
3422;
3423; HASWELL-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003424; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003425; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3426; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003427; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003428; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003429;
Gadi Haber85d99b42017-10-17 13:45:39 +00003430; BROADWELL-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003431; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003432; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3433; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003434; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003435; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003436;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003437; SKYLAKE-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003438; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003439; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3440; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003441; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003442; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003443;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003444; SKX-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003445; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003446; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3447; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003448; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003449; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003450;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003451; ZNVER1-LABEL: test_pmaskmovd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003452; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003453; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
3454; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:?]
3455; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3456; ZNVER1-NEXT: retq # sched: [1:0.50]
3457 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
3458 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
3459 ret <8 x i32> %1
3460}
3461declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
3462declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
3463
3464define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
3465; GENERIC-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003466; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003467; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [4:0.50]
3468; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003469; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3470; GENERIC-NEXT: retq # sched: [1:1.00]
3471;
3472; HASWELL-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003473; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003474; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3475; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003476; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003477; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003478;
Gadi Haber85d99b42017-10-17 13:45:39 +00003479; BROADWELL-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003480; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003481; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3482; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003483; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003484; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003485;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003486; SKYLAKE-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003487; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003488; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3489; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003490; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003491; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003492;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003493; SKX-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003494; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003495; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3496; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003497; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003498; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003499;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003500; ZNVER1-LABEL: test_pmaskmovq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003501; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003502; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3503; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:?]
3504; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3505; ZNVER1-NEXT: retq # sched: [1:0.50]
3506 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3507 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3508 ret <2 x i64> %1
3509}
3510declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3511declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3512
3513define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3514; GENERIC-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003515; GENERIC: # %bb.0:
Simon Pilgrim809c0242017-12-06 18:24:48 +00003516; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [4:0.50]
3517; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003518; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3519; GENERIC-NEXT: retq # sched: [1:1.00]
3520;
3521; HASWELL-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003522; HASWELL: # %bb.0:
Gadi Haber2cf601f2017-12-08 09:48:44 +00003523; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3524; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003525; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003526; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003527;
Gadi Haber85d99b42017-10-17 13:45:39 +00003528; BROADWELL-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003529; BROADWELL: # %bb.0:
Gadi Haber323f2e12017-10-24 20:19:47 +00003530; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3531; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003532; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00003533; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003534;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003535; SKYLAKE-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003536; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003537; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3538; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003539; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003540; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003541;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003542; SKX-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003543; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003544; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3545; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
Craig Topper391c6f92017-12-10 01:24:08 +00003546; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00003547; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003548;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003549; ZNVER1-LABEL: test_pmaskmovq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003550; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00003551; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3552; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:?]
3553; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3554; ZNVER1-NEXT: retq # sched: [1:0.50]
3555 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3556 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3557 ret <4 x i64> %1
3558}
3559declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3560declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3561
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003562define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3563; GENERIC-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003564; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003565; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3566; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3567; GENERIC-NEXT: retq # sched: [1:1.00]
3568;
3569; HASWELL-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003570; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003571; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003572; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3573; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003574;
Gadi Haber85d99b42017-10-17 13:45:39 +00003575; BROADWELL-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003576; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003577; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003578; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3579; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003580;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003581; SKYLAKE-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003582; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003583; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3584; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3585; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003586;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003587; SKX-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003588; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003589; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3590; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3591; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003592;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003593; ZNVER1-LABEL: test_pmaxsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003594; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003595; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3596; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3597; ZNVER1-NEXT: retq # sched: [1:0.50]
3598 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3599 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3600 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3601 ret <32 x i8> %3
3602}
3603declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3604
3605define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3606; GENERIC-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003607; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003608; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3609; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3610; GENERIC-NEXT: retq # sched: [1:1.00]
3611;
3612; HASWELL-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003613; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003614; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003615; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3616; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003617;
Gadi Haber85d99b42017-10-17 13:45:39 +00003618; BROADWELL-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003619; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003620; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003621; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3622; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003623;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003624; SKYLAKE-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003625; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003626; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3627; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3628; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003629;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003630; SKX-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003631; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003632; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3633; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3634; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003635;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003636; ZNVER1-LABEL: test_pmaxsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003637; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003638; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3639; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3640; ZNVER1-NEXT: retq # sched: [1:0.50]
3641 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3642 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3643 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3644 ret <8 x i32> %3
3645}
3646declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3647
3648define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3649; GENERIC-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003650; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003651; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3652; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3653; GENERIC-NEXT: retq # sched: [1:1.00]
3654;
3655; HASWELL-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003656; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003657; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003658; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3659; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003660;
Gadi Haber85d99b42017-10-17 13:45:39 +00003661; BROADWELL-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003662; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003663; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003664; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3665; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003666;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003667; SKYLAKE-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003668; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003669; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3670; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3671; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003672;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003673; SKX-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003674; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003675; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3676; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3677; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003678;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003679; ZNVER1-LABEL: test_pmaxsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003680; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003681; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3682; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3683; ZNVER1-NEXT: retq # sched: [1:0.50]
3684 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3685 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3686 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3687 ret <16 x i16> %3
3688}
3689declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3690
3691define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3692; GENERIC-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003693; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003694; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3695; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3696; GENERIC-NEXT: retq # sched: [1:1.00]
3697;
3698; HASWELL-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003699; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003700; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003701; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3702; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003703;
Gadi Haber85d99b42017-10-17 13:45:39 +00003704; BROADWELL-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003705; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003706; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003707; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3708; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003709;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003710; SKYLAKE-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003711; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003712; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3713; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3714; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003715;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003716; SKX-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003717; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003718; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3719; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3720; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003721;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003722; ZNVER1-LABEL: test_pmaxub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003723; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003724; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3725; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3726; ZNVER1-NEXT: retq # sched: [1:0.50]
3727 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3728 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3729 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3730 ret <32 x i8> %3
3731}
3732declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3733
3734define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3735; GENERIC-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003736; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003737; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3738; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3739; GENERIC-NEXT: retq # sched: [1:1.00]
3740;
3741; HASWELL-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003742; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003743; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003744; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3745; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003746;
Gadi Haber85d99b42017-10-17 13:45:39 +00003747; BROADWELL-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003748; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003749; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003750; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3751; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003752;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003753; SKYLAKE-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003754; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003755; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3756; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3757; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003758;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003759; SKX-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003760; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003761; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3762; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3763; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003764;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003765; ZNVER1-LABEL: test_pmaxud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003766; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003767; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3768; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3769; ZNVER1-NEXT: retq # sched: [1:0.50]
3770 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3771 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3772 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3773 ret <8 x i32> %3
3774}
3775declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3776
3777define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3778; GENERIC-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003779; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003780; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3781; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3782; GENERIC-NEXT: retq # sched: [1:1.00]
3783;
3784; HASWELL-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003785; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003786; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003787; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3788; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003789;
Gadi Haber85d99b42017-10-17 13:45:39 +00003790; BROADWELL-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003791; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003792; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003793; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3794; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003795;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003796; SKYLAKE-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003797; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003798; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3799; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3800; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003801;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003802; SKX-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003803; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003804; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3805; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3806; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003807;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003808; ZNVER1-LABEL: test_pmaxuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003809; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003810; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3811; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3812; ZNVER1-NEXT: retq # sched: [1:0.50]
3813 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3814 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3815 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3816 ret <16 x i16> %3
3817}
3818declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3819
3820define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3821; GENERIC-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003822; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003823; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3824; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3825; GENERIC-NEXT: retq # sched: [1:1.00]
3826;
3827; HASWELL-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003828; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003829; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003830; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3831; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003832;
Gadi Haber85d99b42017-10-17 13:45:39 +00003833; BROADWELL-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003834; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003835; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003836; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3837; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003838;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003839; SKYLAKE-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003840; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003841; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3842; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3843; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003844;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003845; SKX-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003846; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003847; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3848; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3849; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003850;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003851; ZNVER1-LABEL: test_pminsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003852; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003853; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3854; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3855; ZNVER1-NEXT: retq # sched: [1:0.50]
3856 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3857 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3858 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3859 ret <32 x i8> %3
3860}
3861declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3862
3863define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3864; GENERIC-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003865; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003866; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3867; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3868; GENERIC-NEXT: retq # sched: [1:1.00]
3869;
3870; HASWELL-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003871; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003872; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003873; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3874; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003875;
Gadi Haber85d99b42017-10-17 13:45:39 +00003876; BROADWELL-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003877; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003878; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003879; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3880; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003881;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003882; SKYLAKE-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003883; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003884; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3885; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3886; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003887;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003888; SKX-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003889; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003890; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3891; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3892; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003893;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003894; ZNVER1-LABEL: test_pminsd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003895; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003896; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3897; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3898; ZNVER1-NEXT: retq # sched: [1:0.50]
3899 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3900 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3901 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3902 ret <8 x i32> %3
3903}
3904declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3905
3906define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3907; GENERIC-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003908; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003909; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3910; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3911; GENERIC-NEXT: retq # sched: [1:1.00]
3912;
3913; HASWELL-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003914; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003915; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003916; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3917; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003918;
Gadi Haber85d99b42017-10-17 13:45:39 +00003919; BROADWELL-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003920; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003921; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003922; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3923; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003924;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003925; SKYLAKE-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003926; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003927; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3928; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3929; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003930;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003931; SKX-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003932; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003933; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3934; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3935; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003936;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003937; ZNVER1-LABEL: test_pminsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003938; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003939; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3940; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3941; ZNVER1-NEXT: retq # sched: [1:0.50]
3942 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3943 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3944 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3945 ret <16 x i16> %3
3946}
3947declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3948
3949define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3950; GENERIC-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003951; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003952; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3953; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3954; GENERIC-NEXT: retq # sched: [1:1.00]
3955;
3956; HASWELL-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003957; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003958; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00003959; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3960; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003961;
Gadi Haber85d99b42017-10-17 13:45:39 +00003962; BROADWELL-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003963; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00003964; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00003965; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3966; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00003967;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003968; SKYLAKE-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003969; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00003970; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3971; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3972; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003973;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003974; SKX-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003975; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00003976; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3977; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3978; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00003979;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003980; ZNVER1-LABEL: test_pminub:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003981; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003982; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3983; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3984; ZNVER1-NEXT: retq # sched: [1:0.50]
3985 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
3986 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3987 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
3988 ret <32 x i8> %3
3989}
3990declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
3991
3992define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3993; GENERIC-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00003994; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00003995; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3996; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3997; GENERIC-NEXT: retq # sched: [1:1.00]
3998;
3999; HASWELL-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004000; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004001; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004002; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4003; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004004;
Gadi Haber85d99b42017-10-17 13:45:39 +00004005; BROADWELL-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004006; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004007; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004008; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4009; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004010;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004011; SKYLAKE-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004012; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004013; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4014; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4015; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004016;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004017; SKX-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004018; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00004019; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4020; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4021; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004022;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004023; ZNVER1-LABEL: test_pminud:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004024; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004025; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4026; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4027; ZNVER1-NEXT: retq # sched: [1:0.50]
4028 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
4029 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4030 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
4031 ret <8 x i32> %3
4032}
4033declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
4034
4035define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4036; GENERIC-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004037; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004038; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4039; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4040; GENERIC-NEXT: retq # sched: [1:1.00]
4041;
4042; HASWELL-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004043; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004044; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004045; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4046; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004047;
Gadi Haber85d99b42017-10-17 13:45:39 +00004048; BROADWELL-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004049; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004050; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004051; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4052; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004053;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004054; SKYLAKE-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004055; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004056; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4057; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4058; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004059;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004060; SKX-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004061; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00004062; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4063; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4064; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004065;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004066; ZNVER1-LABEL: test_pminuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004067; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004068; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4069; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4070; ZNVER1-NEXT: retq # sched: [1:0.50]
4071 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
4072 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4073 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
4074 ret <16 x i16> %3
4075}
4076declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
4077
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004078define i32 @test_pmovmskb(<32 x i8> %a0) {
4079; GENERIC-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004080; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004081; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
Simon Pilgrim4ff43d82017-12-10 13:41:29 +00004082; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004083; GENERIC-NEXT: retq # sched: [1:1.00]
4084;
4085; HASWELL-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004086; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004087; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4088; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004089; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004090;
Gadi Haber85d99b42017-10-17 13:45:39 +00004091; BROADWELL-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004092; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004093; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4094; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004095; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004096;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004097; SKYLAKE-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004098; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004099; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004100; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004101; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004102;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004103; SKX-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004104; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004105; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4106; SKX-NEXT: vzeroupper # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004107; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004108;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004109; ZNVER1-LABEL: test_pmovmskb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004110; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00004111; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4112; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4113; ZNVER1-NEXT: retq # sched: [1:0.50]
4114 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
4115 ret i32 %1
4116}
4117declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
4118
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004119define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4120; GENERIC-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004121; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004122; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
4123; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
4124; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4125; GENERIC-NEXT: retq # sched: [1:1.00]
4126;
4127; HASWELL-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004128; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004129; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004130; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004131; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004132; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004133;
Gadi Haber85d99b42017-10-17 13:45:39 +00004134; BROADWELL-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004135; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004136; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004137; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004138; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004139; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004140;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004141; SKYLAKE-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004142; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004143; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004144; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4145; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4146; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004147;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004148; SKX-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004149; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004150; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004151; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4152; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4153; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004154;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004155; ZNVER1-LABEL: test_pmovsxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004156; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004157; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
4158; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
4159; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4160; ZNVER1-NEXT: retq # sched: [1:0.50]
4161 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4162 %2 = sext <8 x i8> %1 to <8 x i32>
4163 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4164 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4165 %5 = sext <8 x i8> %4 to <8 x i32>
4166 %6 = add <8 x i32> %2, %5
4167 ret <8 x i32> %6
4168}
4169
4170define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4171; GENERIC-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004172; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004173; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
4174; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
4175; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4176; GENERIC-NEXT: retq # sched: [1:1.00]
4177;
4178; HASWELL-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004179; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004180; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004181; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004182; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004183; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004184;
Gadi Haber85d99b42017-10-17 13:45:39 +00004185; BROADWELL-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004186; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004187; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004188; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004189; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004190; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004191;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004192; SKYLAKE-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004193; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004194; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004195; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4196; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4197; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004198;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004199; SKX-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004200; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004201; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004202; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4203; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4204; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004205;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004206; ZNVER1-LABEL: test_pmovsxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004207; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004208; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
4209; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
4210; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4211; ZNVER1-NEXT: retq # sched: [1:0.50]
4212 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4213 %2 = sext <4 x i8> %1 to <4 x i64>
4214 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4215 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4216 %5 = sext <4 x i8> %4 to <4 x i64>
4217 %6 = add <4 x i64> %2, %5
4218 ret <4 x i64> %6
4219}
4220
4221define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4222; GENERIC-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004223; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004224; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
4225; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
4226; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4227; GENERIC-NEXT: retq # sched: [1:1.00]
4228;
4229; HASWELL-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004230; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004231; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004232; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004233; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004234; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004235;
Gadi Haber85d99b42017-10-17 13:45:39 +00004236; BROADWELL-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004237; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004238; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004239; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004240; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004241; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004242;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004243; SKYLAKE-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004244; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004245; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004246; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4247; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4248; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004249;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004250; SKX-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004251; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004252; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004253; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4254; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4255; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004256;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004257; ZNVER1-LABEL: test_pmovsxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004258; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004259; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
4260; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
4261; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4262; ZNVER1-NEXT: retq # sched: [1:0.50]
4263 %1 = sext <16 x i8> %a0 to <16 x i16>
4264 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4265 %3 = sext <16 x i8> %2 to <16 x i16>
4266 %4 = add <16 x i16> %1, %3
4267 ret <16 x i16> %4
4268}
4269
4270define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4271; GENERIC-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004272; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004273; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4274; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
4275; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4276; GENERIC-NEXT: retq # sched: [1:1.00]
4277;
4278; HASWELL-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004279; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004280; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004281; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004282; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004283; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004284;
Gadi Haber85d99b42017-10-17 13:45:39 +00004285; BROADWELL-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004286; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004287; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004288; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004289; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004290; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004291;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004292; SKYLAKE-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004293; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004294; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004295; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4296; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4297; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004298;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004299; SKX-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004300; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004301; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004302; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4303; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4304; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004305;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004306; ZNVER1-LABEL: test_pmovsxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004307; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004308; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
4309; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
4310; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4311; ZNVER1-NEXT: retq # sched: [1:0.50]
4312 %1 = sext <4 x i32> %a0 to <4 x i64>
4313 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4314 %3 = sext <4 x i32> %2 to <4 x i64>
4315 %4 = add <4 x i64> %1, %3
4316 ret <4 x i64> %4
4317}
4318
4319define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4320; GENERIC-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004321; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004322; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
4323; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
4324; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4325; GENERIC-NEXT: retq # sched: [1:1.00]
4326;
4327; HASWELL-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004328; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004329; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004330; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004331; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004332; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004333;
Gadi Haber85d99b42017-10-17 13:45:39 +00004334; BROADWELL-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004335; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004336; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004337; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004338; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004339; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004340;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004341; SKYLAKE-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004342; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004343; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004344; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4345; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4346; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004347;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004348; SKX-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004349; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004350; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004351; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4352; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4353; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004354;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004355; ZNVER1-LABEL: test_pmovsxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004356; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004357; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
4358; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
4359; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4360; ZNVER1-NEXT: retq # sched: [1:0.50]
4361 %1 = sext <8 x i16> %a0 to <8 x i32>
4362 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4363 %3 = sext <8 x i16> %2 to <8 x i32>
4364 %4 = add <8 x i32> %1, %3
4365 ret <8 x i32> %4
4366}
4367
4368define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4369; GENERIC-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004370; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004371; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
4372; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
4373; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4374; GENERIC-NEXT: retq # sched: [1:1.00]
4375;
4376; HASWELL-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004377; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004378; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004379; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004380; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004381; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004382;
Gadi Haber85d99b42017-10-17 13:45:39 +00004383; BROADWELL-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004384; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004385; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004386; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004387; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004388; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004389;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004390; SKYLAKE-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004391; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004392; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004393; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4394; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004397; SKX-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004398; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004399; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004400; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4401; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4402; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004403;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004404; ZNVER1-LABEL: test_pmovsxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004405; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004406; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
4407; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
4408; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4409; ZNVER1-NEXT: retq # sched: [1:0.50]
4410 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4411 %2 = sext <4 x i16> %1 to <4 x i64>
4412 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4413 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4414 %5 = sext <4 x i16> %4 to <4 x i64>
4415 %6 = add <4 x i64> %2, %5
4416 ret <4 x i64> %6
4417}
4418
4419define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4420; GENERIC-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004421; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004422; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4423; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
4424; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4425; GENERIC-NEXT: retq # sched: [1:1.00]
4426;
4427; HASWELL-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004428; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004429; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004430; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004431; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004432; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004433;
Gadi Haber85d99b42017-10-17 13:45:39 +00004434; BROADWELL-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004435; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004436; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004437; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004438; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004439; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004440;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004441; SKYLAKE-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004442; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004443; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004444; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4445; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4446; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004447;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004448; SKX-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004449; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004450; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004451; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4452; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4453; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004454;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004455; ZNVER1-LABEL: test_pmovzxbd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004456; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004457; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
4458; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
4459; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4460; ZNVER1-NEXT: retq # sched: [1:0.50]
4461 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4462 %2 = zext <8 x i8> %1 to <8 x i32>
4463 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4464 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4465 %5 = zext <8 x i8> %4 to <8 x i32>
4466 %6 = add <8 x i32> %2, %5
4467 ret <8 x i32> %6
4468}
4469
4470define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4471; GENERIC-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004472; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004473; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4474; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
4475; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4476; GENERIC-NEXT: retq # sched: [1:1.00]
4477;
4478; HASWELL-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004479; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004480; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004481; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004482; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004483; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004484;
Gadi Haber85d99b42017-10-17 13:45:39 +00004485; BROADWELL-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004486; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004487; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004488; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004489; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004490; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004491;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004492; SKYLAKE-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004493; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004494; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004495; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4496; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4497; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004498;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004499; SKX-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004500; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004501; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004502; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4503; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4504; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004505;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004506; ZNVER1-LABEL: test_pmovzxbq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004507; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004508; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
4509; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
4510; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4511; ZNVER1-NEXT: retq # sched: [1:0.50]
4512 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4513 %2 = zext <4 x i8> %1 to <4 x i64>
4514 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4515 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4516 %5 = zext <4 x i8> %4 to <4 x i64>
4517 %6 = add <4 x i64> %2, %5
4518 ret <4 x i64> %6
4519}
4520
4521define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4522; GENERIC-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004523; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004524; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4525; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
4526; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4527; GENERIC-NEXT: retq # sched: [1:1.00]
4528;
4529; HASWELL-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004530; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004531; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004532; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004533; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004534; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004535;
Gadi Haber85d99b42017-10-17 13:45:39 +00004536; BROADWELL-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004537; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004538; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004539; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004540; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004541; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004542;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004543; SKYLAKE-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004544; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004545; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004546; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4547; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4548; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004549;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004550; SKX-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004551; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004552; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004553; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4554; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4555; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004556;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004557; ZNVER1-LABEL: test_pmovzxbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004558; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004559; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
4560; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
4561; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4562; ZNVER1-NEXT: retq # sched: [1:0.50]
4563 %1 = zext <16 x i8> %a0 to <16 x i16>
4564 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4565 %3 = zext <16 x i8> %2 to <16 x i16>
4566 %4 = add <16 x i16> %1, %3
4567 ret <16 x i16> %4
4568}
4569
4570define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4571; GENERIC-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004572; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004573; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4574; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
4575; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4576; GENERIC-NEXT: retq # sched: [1:1.00]
4577;
4578; HASWELL-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004579; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004580; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004581; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004582; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004583; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004584;
Gadi Haber85d99b42017-10-17 13:45:39 +00004585; BROADWELL-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004586; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004587; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004588; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004589; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004590; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004591;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004592; SKYLAKE-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004593; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004594; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004595; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4596; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4597; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004598;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004599; SKX-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004600; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004601; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004602; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4603; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4604; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004605;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004606; ZNVER1-LABEL: test_pmovzxdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004607; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004608; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
4609; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
4610; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4611; ZNVER1-NEXT: retq # sched: [1:0.50]
4612 %1 = zext <4 x i32> %a0 to <4 x i64>
4613 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4614 %3 = zext <4 x i32> %2 to <4 x i64>
4615 %4 = add <4 x i64> %1, %3
4616 ret <4 x i64> %4
4617}
4618
4619define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4620; GENERIC-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004621; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004622; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4623; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
4624; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4625; GENERIC-NEXT: retq # sched: [1:1.00]
4626;
4627; HASWELL-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004628; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004629; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004630; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004631; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004632; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004633;
Gadi Haber85d99b42017-10-17 13:45:39 +00004634; BROADWELL-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004635; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004636; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004637; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004638; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004639; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004640;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004641; SKYLAKE-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004642; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004643; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004644; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4645; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4646; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004647;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004648; SKX-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004649; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004650; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004651; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4652; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4653; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004654;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004655; ZNVER1-LABEL: test_pmovzxwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004656; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004657; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4658; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
4659; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4660; ZNVER1-NEXT: retq # sched: [1:0.50]
4661 %1 = zext <8 x i16> %a0 to <8 x i32>
4662 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4663 %3 = zext <8 x i16> %2 to <8 x i32>
4664 %4 = add <8 x i32> %1, %3
4665 ret <8 x i32> %4
4666}
4667
4668define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4669; GENERIC-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004670; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004671; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4672; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
4673; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4674; GENERIC-NEXT: retq # sched: [1:1.00]
4675;
4676; HASWELL-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004677; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004678; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004679; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004680; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004681; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004682;
Gadi Haber85d99b42017-10-17 13:45:39 +00004683; BROADWELL-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004684; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004685; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004686; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004687; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00004688; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004689;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004690; SKYLAKE-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004691; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004692; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004693; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4694; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4695; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004696;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004697; SKX-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004698; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004699; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00004700; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4701; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4702; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004703;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004704; ZNVER1-LABEL: test_pmovzxwq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004705; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004706; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4707; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
4708; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4709; ZNVER1-NEXT: retq # sched: [1:0.50]
4710 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4711 %2 = zext <4 x i16> %1 to <4 x i64>
4712 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4713 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4714 %5 = zext <4 x i16> %4 to <4 x i64>
4715 %6 = add <4 x i64> %2, %5
4716 ret <4 x i64> %6
4717}
4718
4719define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4720; GENERIC-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004721; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004722; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4723; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4724; GENERIC-NEXT: retq # sched: [1:1.00]
4725;
4726; HASWELL-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004727; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004728; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004729; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4730; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004731;
Gadi Haber85d99b42017-10-17 13:45:39 +00004732; BROADWELL-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004733; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004734; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004735; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4736; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004737;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004738; SKYLAKE-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004739; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004740; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004741; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4742; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004743;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004744; SKX-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004745; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004746; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004747; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4748; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004749;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004750; ZNVER1-LABEL: test_pmuldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004751; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004752; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4753; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4754; ZNVER1-NEXT: retq # sched: [1:0.50]
4755 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4756 %2 = bitcast <4 x i64> %1 to <8 x i32>
4757 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4758 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
4759 ret <4 x i64> %4
4760}
4761declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4762
4763define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4764; GENERIC-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004765; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004766; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4767; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4768; GENERIC-NEXT: retq # sched: [1:1.00]
4769;
4770; HASWELL-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004771; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004772; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004773; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4774; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004775;
Gadi Haber85d99b42017-10-17 13:45:39 +00004776; BROADWELL-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004777; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004778; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004779; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4780; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004781;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004782; SKYLAKE-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004783; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004784; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004785; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4786; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004787;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004788; SKX-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004789; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004790; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004791; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4792; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004793;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004794; ZNVER1-LABEL: test_pmulhrsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004795; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004796; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4797; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4798; ZNVER1-NEXT: retq # sched: [1:0.50]
4799 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4800 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4801 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4802 ret <16 x i16> %3
4803}
4804declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4805
4806define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4807; GENERIC-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004808; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004809; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4810; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4811; GENERIC-NEXT: retq # sched: [1:1.00]
4812;
4813; HASWELL-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004814; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004815; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004816; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4817; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004818;
Gadi Haber85d99b42017-10-17 13:45:39 +00004819; BROADWELL-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004820; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004821; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004822; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4823; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004824;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004825; SKYLAKE-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004826; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004827; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004828; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4829; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004830;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004831; SKX-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004832; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004833; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004834; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4835; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004836;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004837; ZNVER1-LABEL: test_pmulhuw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004838; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004839; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4840; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4841; ZNVER1-NEXT: retq # sched: [1:0.50]
4842 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4843 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4844 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4845 ret <16 x i16> %3
4846}
4847declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4848
4849define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4850; GENERIC-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004851; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004852; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4853; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4854; GENERIC-NEXT: retq # sched: [1:1.00]
4855;
4856; HASWELL-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004857; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004858; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004859; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4860; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004861;
Gadi Haber85d99b42017-10-17 13:45:39 +00004862; BROADWELL-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004863; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004864; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004865; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4866; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004867;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004868; SKYLAKE-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004869; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004870; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004871; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4872; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004873;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004874; SKX-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004875; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004876; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004877; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4878; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004879;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004880; ZNVER1-LABEL: test_pmulhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004881; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004882; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4883; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4884; ZNVER1-NEXT: retq # sched: [1:0.50]
4885 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4886 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4887 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4888 ret <16 x i16> %3
4889}
4890declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4891
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004892define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004893; GENERIC-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004894; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00004895; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4896; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4897; GENERIC-NEXT: retq # sched: [1:1.00]
4898;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004899; HASWELL-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004900; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004901; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004902; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00]
4903; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004904;
Gadi Haber85d99b42017-10-17 13:45:39 +00004905; BROADWELL-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004906; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004907; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004908; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
4909; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004910;
Gadi Haber767d98b2017-08-30 08:08:50 +00004911; SKYLAKE-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004912; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004913; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004914; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4915; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004916;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004917; SKX-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004918; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004919; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
Gadi Haber684944b2017-10-08 12:52:54 +00004920; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4921; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004922;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004923; ZNVER1-LABEL: test_pmulld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004924; ZNVER1: # %bb.0:
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004925; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4926; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4927; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004928 %1 = mul <8 x i32> %a0, %a1
4929 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4930 %3 = mul <8 x i32> %1, %2
4931 ret <8 x i32> %3
4932}
4933
4934define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00004935; GENERIC-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004936; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00004937; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4938; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4939; GENERIC-NEXT: retq # sched: [1:1.00]
4940;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004941; HASWELL-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004942; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004943; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004944; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4945; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004946;
Gadi Haber85d99b42017-10-17 13:45:39 +00004947; BROADWELL-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004948; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004949; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004950; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4951; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004952;
Gadi Haber767d98b2017-08-30 08:08:50 +00004953; SKYLAKE-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004954; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004955; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004956; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4957; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00004958;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004959; SKX-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004960; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004961; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00004962; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4963; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00004964;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004965; ZNVER1-LABEL: test_pmullw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004966; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00004967; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4968; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00004969; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00004970 %1 = mul <16 x i16> %a0, %a1
4971 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4972 %3 = mul <16 x i16> %1, %2
4973 ret <16 x i16> %3
4974}
4975
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004976define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4977; GENERIC-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004978; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004979; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4980; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4981; GENERIC-NEXT: retq # sched: [1:1.00]
4982;
4983; HASWELL-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004984; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004985; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00004986; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4987; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004988;
Gadi Haber85d99b42017-10-17 13:45:39 +00004989; BROADWELL-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004990; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00004991; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00004992; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4993; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00004994;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00004995; SKYLAKE-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00004996; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00004997; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00004998; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4999; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005000;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005001; SKX-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005002; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005003; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
Gadi Haber684944b2017-10-08 12:52:54 +00005004; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5005; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005006;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005007; ZNVER1-LABEL: test_pmuludq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005008; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005009; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5010; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5011; ZNVER1-NEXT: retq # sched: [1:0.50]
5012 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
5013 %2 = bitcast <4 x i64> %1 to <8 x i32>
5014 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
5015 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
5016 ret <4 x i64> %4
5017}
5018declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
5019
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005020define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00005021; GENERIC-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005022; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00005023; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5024; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5025; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5026; GENERIC-NEXT: retq # sched: [1:1.00]
5027;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005028; HASWELL-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005029; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005030; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005031; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005032; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005033; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005034;
Gadi Haber85d99b42017-10-17 13:45:39 +00005035; BROADWELL-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005036; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005037; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005038; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00005039; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005040; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005041;
Gadi Haber767d98b2017-08-30 08:08:50 +00005042; SKYLAKE-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005043; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005044; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5045; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5046; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5047; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00005048;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005049; SKX-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005050; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005051; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5052; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5053; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5054; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005055;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005056; ZNVER1-LABEL: test_por:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005057; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00005058; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5059; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5060; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00005061; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00005062 %1 = or <4 x i64> %a0, %a1
5063 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5064 %3 = or <4 x i64> %1, %2
5065 %4 = add <4 x i64> %3, %a1
5066 ret <4 x i64> %4
5067}
5068
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005069define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5070; GENERIC-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005071; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005072; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5073; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5074; GENERIC-NEXT: retq # sched: [1:1.00]
5075;
5076; HASWELL-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005077; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005078; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005079; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5080; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005081;
Gadi Haber85d99b42017-10-17 13:45:39 +00005082; BROADWELL-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005083; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005084; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005085; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5086; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005087;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005088; SKYLAKE-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005089; SKYLAKE: # %bb.0:
Gadi Haber6f8fbf42017-09-19 06:19:27 +00005090; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005091; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5092; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005093;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005094; SKX-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005095; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005096; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005097; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5098; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005099;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005100; ZNVER1-LABEL: test_psadbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005101; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005102; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5103; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5104; ZNVER1-NEXT: retq # sched: [1:0.50]
5105 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
5106 %2 = bitcast <4 x i64> %1 to <32 x i8>
5107 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
5108 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
5109 ret <4 x i64> %4
5110}
5111declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
5112
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005113define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5114; GENERIC-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005115; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005116; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5117; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5118; GENERIC-NEXT: retq # sched: [1:1.00]
5119;
5120; HASWELL-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005121; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005122; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005123; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5124; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005125;
Gadi Haber85d99b42017-10-17 13:45:39 +00005126; BROADWELL-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005127; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005128; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005129; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5130; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005131;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005132; SKYLAKE-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005133; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005134; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005135; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5136; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005137;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005138; SKX-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005139; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005140; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005141; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5142; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005143;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005144; ZNVER1-LABEL: test_pshufb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005145; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005146; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5147; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5148; ZNVER1-NEXT: retq # sched: [1:0.50]
5149 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
5150 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5151 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
5152 ret <32 x i8> %3
5153}
5154declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
5155
5156define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
5157; GENERIC-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005158; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005159; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5160; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005161; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005162; GENERIC-NEXT: retq # sched: [1:1.00]
5163;
5164; HASWELL-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005165; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005166; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005167; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
Craig Topperd4341922017-09-18 03:29:47 +00005168; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005169; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005170;
Gadi Haber85d99b42017-10-17 13:45:39 +00005171; BROADWELL-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005172; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005173; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005174; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005175; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005176; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005177;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005178; SKYLAKE-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005179; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005180; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005181; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5182; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5183; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005184;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005185; SKX-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005186; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005187; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005188; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5189; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5190; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005191;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005192; ZNVER1-LABEL: test_pshufd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005193; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005194; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
5195; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
Craig Topperd4341922017-09-18 03:29:47 +00005196; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005197; ZNVER1-NEXT: retq # sched: [1:0.50]
5198 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
5199 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
5200 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Craig Topperd4341922017-09-18 03:29:47 +00005201 %4 = add <8 x i32> %1, %3
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005202 ret <8 x i32> %4
5203}
5204
5205define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
5206; GENERIC-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005207; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005208; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5209; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
5210; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5211; GENERIC-NEXT: retq # sched: [1:1.00]
5212;
5213; HASWELL-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005214; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005215; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005216; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005217; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005218; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005219;
Gadi Haber85d99b42017-10-17 13:45:39 +00005220; BROADWELL-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005221; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005222; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005223; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005224; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005225; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005226;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005227; SKYLAKE-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005228; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005229; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005230; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5231; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5232; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005233;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005234; SKX-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005235; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005236; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005237; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5238; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5239; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005240;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005241; ZNVER1-LABEL: test_pshufhw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005242; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005243; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
5244; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
5245; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5246; ZNVER1-NEXT: retq # sched: [1:0.50]
5247 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
5248 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5249 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
5250 %4 = or <16 x i16> %1, %3
5251 ret <16 x i16> %4
5252}
5253
5254define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
5255; GENERIC-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005256; GENERIC: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005257; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5258; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
5259; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5260; GENERIC-NEXT: retq # sched: [1:1.00]
5261;
5262; HASWELL-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005263; HASWELL: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005264; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005265; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005266; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005267; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005268;
Gadi Haber85d99b42017-10-17 13:45:39 +00005269; BROADWELL-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005270; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005271; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005272; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005273; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00005274; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005275;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005276; SKYLAKE-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005277; SKYLAKE: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005278; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005279; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5280; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5281; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005282;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005283; SKX-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005284; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005285; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005286; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5287; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5288; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005289;
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005290; ZNVER1-LABEL: test_pshuflw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005291; ZNVER1: # %bb.0:
Simon Pilgrim5a931c62017-09-12 11:17:01 +00005292; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
5293; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
5294; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5295; ZNVER1-NEXT: retq # sched: [1:0.50]
5296 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
5297 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5298 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
5299 %4 = or <16 x i16> %1, %3
5300 ret <16 x i16> %4
5301}
5302
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005303define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5304; GENERIC-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005305; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005306; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5307; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5308; GENERIC-NEXT: retq # sched: [1:1.00]
5309;
5310; HASWELL-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005311; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005312; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005313; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5314; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005315;
Gadi Haber85d99b42017-10-17 13:45:39 +00005316; BROADWELL-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005317; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005318; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005319; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5320; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005321;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005322; SKYLAKE-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005323; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005324; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5325; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5326; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005327;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005328; SKX-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005329; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005330; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5331; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5332; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005333;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005334; ZNVER1-LABEL: test_psignb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005335; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005336; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5337; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5338; ZNVER1-NEXT: retq # sched: [1:0.50]
5339 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
5340 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5341 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
5342 ret <32 x i8> %3
5343}
5344declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
5345
5346define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5347; GENERIC-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005348; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005349; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5350; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5351; GENERIC-NEXT: retq # sched: [1:1.00]
5352;
5353; HASWELL-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005354; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005355; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005356; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5357; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005358;
Gadi Haber85d99b42017-10-17 13:45:39 +00005359; BROADWELL-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005360; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005361; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005362; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5363; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005364;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005365; SKYLAKE-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005366; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005367; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5368; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5369; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005370;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005371; SKX-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005372; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005373; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5374; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5375; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005376;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005377; ZNVER1-LABEL: test_psignd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005378; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005379; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5380; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5381; ZNVER1-NEXT: retq # sched: [1:0.50]
5382 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
5383 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5384 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
5385 ret <8 x i32> %3
5386}
5387declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
5388
5389define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5390; GENERIC-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005391; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005392; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5393; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5394; GENERIC-NEXT: retq # sched: [1:1.00]
5395;
5396; HASWELL-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005397; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005398; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005399; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5400; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005401;
Gadi Haber85d99b42017-10-17 13:45:39 +00005402; BROADWELL-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005403; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005404; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00005405; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5406; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005407;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005408; SKYLAKE-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005409; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005410; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5411; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5412; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005413;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005414; SKX-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005415; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005416; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5417; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5418; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005419;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005420; ZNVER1-LABEL: test_psignw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005421; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005422; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5423; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5424; ZNVER1-NEXT: retq # sched: [1:0.50]
5425 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
5426 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5427 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
5428 ret <16 x i16> %3
5429}
5430declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
5431
5432define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5433; GENERIC-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005434; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005435; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5436; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5437; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5438; GENERIC-NEXT: retq # sched: [1:1.00]
5439;
5440; HASWELL-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005441; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005442; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005443; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005444; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005445; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005446;
Gadi Haber85d99b42017-10-17 13:45:39 +00005447; BROADWELL-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005448; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005449; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005450; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005451; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005452; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005453;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005454; SKYLAKE-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005455; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005456; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005457; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5458; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5459; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005460;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005461; SKX-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005462; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005463; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005464; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5465; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5466; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005467;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005468; ZNVER1-LABEL: test_pslld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005469; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005470; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5471; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5472; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
5473; ZNVER1-NEXT: retq # sched: [1:0.50]
5474 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
5475 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5476 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
5477 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5478 ret <8 x i32> %4
5479}
5480declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
5481
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005482define <32 x i8> @test_pslldq(<32 x i8> %a0) {
5483; GENERIC-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005484; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005485; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5486; GENERIC-NEXT: retq # sched: [1:1.00]
5487;
5488; HASWELL-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005489; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005490; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005491; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005492;
Gadi Haber85d99b42017-10-17 13:45:39 +00005493; BROADWELL-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005494; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005495; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005496; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005497;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005498; SKYLAKE-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005499; SKYLAKE: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005500; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005501; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005502;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005503; SKX-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005504; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005505; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005506; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005507;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005508; ZNVER1-LABEL: test_pslldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005509; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00005510; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
5511; ZNVER1-NEXT: retq # sched: [1:0.50]
5512 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
5513 ret <32 x i8> %1
5514}
5515
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005516define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5517; GENERIC-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005518; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005519; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5520; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5521; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5522; GENERIC-NEXT: retq # sched: [1:1.00]
5523;
5524; HASWELL-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005525; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005526; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005527; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005528; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005529; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005530;
Gadi Haber85d99b42017-10-17 13:45:39 +00005531; BROADWELL-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005532; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005533; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005534; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005535; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005536; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005537;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005538; SKYLAKE-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005539; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005540; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005541; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5542; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5543; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005544;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005545; SKX-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005546; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005547; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005548; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5549; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5550; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005551;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005552; ZNVER1-LABEL: test_psllq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005553; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005554; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5555; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5556; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
5557; ZNVER1-NEXT: retq # sched: [1:0.50]
5558 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
5559 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5560 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
5561 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5562 ret <4 x i64> %4
5563}
5564declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
5565
5566define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5567; GENERIC-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005568; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005569; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5570; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5571; GENERIC-NEXT: retq # sched: [1:1.00]
5572;
5573; HASWELL-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005574; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005575; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005576; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5577; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005578;
Gadi Haber85d99b42017-10-17 13:45:39 +00005579; BROADWELL-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005580; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005581; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005582; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5583; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005584;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005585; SKYLAKE-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005586; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005587; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5588; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5589; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005590;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005591; SKX-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005592; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005593; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5594; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5595; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005596;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005597; ZNVER1-LABEL: test_psllvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005598; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005599; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5600; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5601; ZNVER1-NEXT: retq # sched: [1:0.50]
5602 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
5603 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5604 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
5605 ret <4 x i32> %3
5606}
5607declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
5608
5609define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5610; GENERIC-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005611; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005612; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5613; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5614; GENERIC-NEXT: retq # sched: [1:1.00]
5615;
5616; HASWELL-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005617; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005618; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005619; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5620; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005621;
Gadi Haber85d99b42017-10-17 13:45:39 +00005622; BROADWELL-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005623; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005624; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005625; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5626; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005627;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005628; SKYLAKE-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005629; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005630; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5631; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5632; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005633;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005634; SKX-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005635; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005636; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5637; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5638; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005639;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005640; ZNVER1-LABEL: test_psllvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005641; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005642; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5643; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5644; ZNVER1-NEXT: retq # sched: [1:0.50]
5645 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5646 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5647 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
5648 ret <8 x i32> %3
5649}
5650declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5651
5652define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5653; GENERIC-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005654; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005655; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5656; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5657; GENERIC-NEXT: retq # sched: [1:1.00]
5658;
5659; HASWELL-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005660; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005661; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005662; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5663; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005664;
Gadi Haber85d99b42017-10-17 13:45:39 +00005665; BROADWELL-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005666; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005667; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005668; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
5669; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005670;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005671; SKYLAKE-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005672; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005673; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5674; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5675; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005676;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005677; SKX-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005678; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005679; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5680; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5681; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005682;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005683; ZNVER1-LABEL: test_psllvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005684; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005685; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5686; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5687; ZNVER1-NEXT: retq # sched: [1:0.50]
5688 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
5689 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5690 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
5691 ret <2 x i64> %3
5692}
5693declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
5694
5695define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5696; GENERIC-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005697; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005698; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5699; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5700; GENERIC-NEXT: retq # sched: [1:1.00]
5701;
5702; HASWELL-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005703; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005704; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005705; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5706; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005707;
Gadi Haber85d99b42017-10-17 13:45:39 +00005708; BROADWELL-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005709; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005710; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005711; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5712; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005713;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005714; SKYLAKE-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005715; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005716; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5717; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5718; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005719;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005720; SKX-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005721; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005722; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5723; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5724; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005725;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005726; ZNVER1-LABEL: test_psllvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005727; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005728; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5729; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5730; ZNVER1-NEXT: retq # sched: [1:0.50]
5731 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5732 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5733 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
5734 ret <4 x i64> %3
5735}
5736declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5737
5738define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5739; GENERIC-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005740; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005741; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5742; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5743; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5744; GENERIC-NEXT: retq # sched: [1:1.00]
5745;
5746; HASWELL-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005747; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005748; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005749; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005750; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005751; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005752;
Gadi Haber85d99b42017-10-17 13:45:39 +00005753; BROADWELL-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005754; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005755; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005756; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005757; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005758; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005759;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005760; SKYLAKE-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005761; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005762; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005763; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5764; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5765; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005766;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005767; SKX-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005768; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005769; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005770; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5771; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5772; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005773;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005774; ZNVER1-LABEL: test_psllw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005775; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005776; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5777; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5778; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
5779; ZNVER1-NEXT: retq # sched: [1:0.50]
5780 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
5781 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5782 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
5783 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5784 ret <16 x i16> %4
5785}
5786declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
5787
5788define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5789; GENERIC-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005790; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005791; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5792; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5793; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5794; GENERIC-NEXT: retq # sched: [1:1.00]
5795;
5796; HASWELL-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005797; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005798; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005799; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005800; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005801; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005802;
Gadi Haber85d99b42017-10-17 13:45:39 +00005803; BROADWELL-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005804; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005805; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005806; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005807; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005808; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005809;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005810; SKYLAKE-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005811; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005812; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005813; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5814; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5815; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005816;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005817; SKX-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005818; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005819; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005820; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5821; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5822; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005823;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005824; ZNVER1-LABEL: test_psrad:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005825; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005826; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5827; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5828; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5829; ZNVER1-NEXT: retq # sched: [1:0.50]
5830 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5831 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5832 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5833 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5834 ret <8 x i32> %4
5835}
5836declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5837
5838define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5839; GENERIC-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005840; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005841; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5842; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5843; GENERIC-NEXT: retq # sched: [1:1.00]
5844;
5845; HASWELL-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005846; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005847; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005848; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5849; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005850;
Gadi Haber85d99b42017-10-17 13:45:39 +00005851; BROADWELL-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005852; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005853; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005854; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5855; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005856;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005857; SKYLAKE-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005858; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005859; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5860; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5861; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005862;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005863; SKX-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005864; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005865; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5866; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5867; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005868;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005869; ZNVER1-LABEL: test_psravd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005870; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005871; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5872; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5873; ZNVER1-NEXT: retq # sched: [1:0.50]
5874 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5875 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5876 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5877 ret <4 x i32> %3
5878}
5879declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5880
5881define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5882; GENERIC-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005883; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005884; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5885; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5886; GENERIC-NEXT: retq # sched: [1:1.00]
5887;
5888; HASWELL-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005889; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005890; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005891; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5892; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005893;
Gadi Haber85d99b42017-10-17 13:45:39 +00005894; BROADWELL-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005895; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005896; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005897; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5898; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005899;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005900; SKYLAKE-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005901; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005902; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5903; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5904; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005905;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005906; SKX-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005907; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00005908; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5909; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5910; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005911;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005912; ZNVER1-LABEL: test_psravd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005913; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005914; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5915; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5916; ZNVER1-NEXT: retq # sched: [1:0.50]
5917 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5918 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5919 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5920 ret <8 x i32> %3
5921}
5922declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5923
5924define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5925; GENERIC-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005926; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005927; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5928; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5929; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5930; GENERIC-NEXT: retq # sched: [1:1.00]
5931;
5932; HASWELL-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005933; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005934; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005935; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005936; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005937; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005938;
Gadi Haber85d99b42017-10-17 13:45:39 +00005939; BROADWELL-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005940; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005941; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005942; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005943; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005944; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005945;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005946; SKYLAKE-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005947; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005948; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005949; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5950; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5951; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005952;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005953; SKX-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005954; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005955; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00005956; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5957; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5958; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00005959;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005960; ZNVER1-LABEL: test_psraw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005961; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005962; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5963; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5964; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5965; ZNVER1-NEXT: retq # sched: [1:0.50]
5966 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5967 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5968 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5969 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5970 ret <16 x i16> %4
5971}
5972declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5973
5974define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5975; GENERIC-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005976; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005977; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5978; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5979; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5980; GENERIC-NEXT: retq # sched: [1:1.00]
5981;
5982; HASWELL-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005983; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005984; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005985; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005986; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00005987; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005988;
Gadi Haber85d99b42017-10-17 13:45:39 +00005989; BROADWELL-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005990; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00005991; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005992; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005993; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00005994; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00005995;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005996; SKYLAKE-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00005997; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00005998; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00005999; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6000; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6001; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006002;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006003; SKX-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006004; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006005; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006006; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6007; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6008; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006009;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006010; ZNVER1-LABEL: test_psrld:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006011; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006012; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6013; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6014; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
6015; ZNVER1-NEXT: retq # sched: [1:0.50]
6016 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
6017 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6018 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
6019 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
6020 ret <8 x i32> %4
6021}
6022declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
6023
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006024define <32 x i8> @test_psrldq(<32 x i8> %a0) {
6025; GENERIC-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006026; GENERIC: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006027; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6028; GENERIC-NEXT: retq # sched: [1:1.00]
6029;
6030; HASWELL-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006031; HASWELL: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006032; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006033; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006034;
Gadi Haber85d99b42017-10-17 13:45:39 +00006035; BROADWELL-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006036; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006037; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006038; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006039;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006040; SKYLAKE-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006041; SKYLAKE: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006042; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006043; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006044;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006045; SKX-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006046; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006047; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006048; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006049;
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006050; ZNVER1-LABEL: test_psrldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006051; ZNVER1: # %bb.0:
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006052; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
6053; ZNVER1-NEXT: retq # sched: [1:0.50]
6054 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
6055 ret <32 x i8> %1
6056}
6057
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006058define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6059; GENERIC-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006060; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006061; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6062; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6063; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6064; GENERIC-NEXT: retq # sched: [1:1.00]
6065;
6066; HASWELL-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006067; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006068; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006069; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006070; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006071; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006072;
Gadi Haber85d99b42017-10-17 13:45:39 +00006073; BROADWELL-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006074; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006075; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006076; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006077; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006078; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006079;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006080; SKYLAKE-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006081; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006082; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006083; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6084; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6085; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006086;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006087; SKX-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006088; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006089; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006090; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6091; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6092; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006093;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006094; ZNVER1-LABEL: test_psrlq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006095; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006096; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6097; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6098; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
6099; ZNVER1-NEXT: retq # sched: [1:0.50]
6100 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
6101 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6102 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
6103 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
6104 ret <4 x i64> %4
6105}
6106declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
6107
6108define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
6109; GENERIC-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006110; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006111; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6112; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6113; GENERIC-NEXT: retq # sched: [1:1.00]
6114;
6115; HASWELL-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006116; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006117; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006118; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
6119; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006120;
Gadi Haber85d99b42017-10-17 13:45:39 +00006121; BROADWELL-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006122; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006123; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006124; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
6125; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006126;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006127; SKYLAKE-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006128; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006129; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6130; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6131; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006132;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006133; SKX-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006134; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006135; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6136; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6137; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006138;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006139; ZNVER1-LABEL: test_psrlvd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006140; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006141; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6142; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6143; ZNVER1-NEXT: retq # sched: [1:0.50]
6144 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
6145 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6146 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
6147 ret <4 x i32> %3
6148}
6149declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
6150
6151define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6152; GENERIC-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006153; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006154; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6155; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6156; GENERIC-NEXT: retq # sched: [1:1.00]
6157;
6158; HASWELL-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006159; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006160; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006161; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
6162; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006163;
Gadi Haber85d99b42017-10-17 13:45:39 +00006164; BROADWELL-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006165; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006166; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006167; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
6168; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006169;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006170; SKYLAKE-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006171; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006172; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6173; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6174; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006175;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006176; SKX-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006177; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006178; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6179; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6180; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006181;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006182; ZNVER1-LABEL: test_psrlvd_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006183; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006184; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6185; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6186; ZNVER1-NEXT: retq # sched: [1:0.50]
6187 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
6188 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6189 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
6190 ret <8 x i32> %3
6191}
6192declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
6193
6194define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6195; GENERIC-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006196; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006197; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6198; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6199; GENERIC-NEXT: retq # sched: [1:1.00]
6200;
6201; HASWELL-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006202; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006203; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006204; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
6205; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006206;
Gadi Haber85d99b42017-10-17 13:45:39 +00006207; BROADWELL-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006208; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006209; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006210; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
6211; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006212;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006213; SKYLAKE-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006214; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006215; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6216; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6217; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006218;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006219; SKX-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006220; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006221; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6222; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6223; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006224;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006225; ZNVER1-LABEL: test_psrlvq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006226; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006227; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6228; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6229; ZNVER1-NEXT: retq # sched: [1:0.50]
6230 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
6231 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6232 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
6233 ret <2 x i64> %3
6234}
6235declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
6236
6237define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6238; GENERIC-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006239; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006240; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6241; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6242; GENERIC-NEXT: retq # sched: [1:1.00]
6243;
6244; HASWELL-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006245; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006246; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006247; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6248; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006249;
Gadi Haber85d99b42017-10-17 13:45:39 +00006250; BROADWELL-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006251; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006252; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006253; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6254; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006255;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006256; SKYLAKE-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006257; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006258; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6259; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6260; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006261;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006262; SKX-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006263; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006264; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6265; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6266; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006267;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006268; ZNVER1-LABEL: test_psrlvq_ymm:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006269; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006270; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6271; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6272; ZNVER1-NEXT: retq # sched: [1:0.50]
6273 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
6274 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6275 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
6276 ret <4 x i64> %3
6277}
6278declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
6279
6280define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
6281; GENERIC-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006282; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006283; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6284; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6285; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6286; GENERIC-NEXT: retq # sched: [1:1.00]
6287;
6288; HASWELL-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006289; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006290; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006291; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006292; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006293; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006294;
Gadi Haber85d99b42017-10-17 13:45:39 +00006295; BROADWELL-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006296; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006297; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006298; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006299; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006300; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006301;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006302; SKYLAKE-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006303; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006304; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006305; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6306; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6307; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006308;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006309; SKX-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006310; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006311; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006312; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6313; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6314; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006315;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006316; ZNVER1-LABEL: test_psrlw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006317; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006318; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6319; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6320; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
6321; ZNVER1-NEXT: retq # sched: [1:0.50]
6322 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
6323 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
6324 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
6325 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
6326 ret <16 x i16> %4
6327}
6328declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
6329
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006330define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006331; GENERIC-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006332; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006333; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6334; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6335; GENERIC-NEXT: retq # sched: [1:1.00]
6336;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006337; HASWELL-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006338; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006339; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006340; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6341; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006342;
Gadi Haber85d99b42017-10-17 13:45:39 +00006343; BROADWELL-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006344; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006345; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006346; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6347; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006348;
Gadi Haber767d98b2017-08-30 08:08:50 +00006349; SKYLAKE-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006350; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006351; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6352; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6353; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006354;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006355; SKX-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006356; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006357; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6358; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6359; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006360;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006361; ZNVER1-LABEL: test_psubb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006362; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006363; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6364; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006365; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006366 %1 = sub <32 x i8> %a0, %a1
6367 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6368 %3 = sub <32 x i8> %1, %2
6369 ret <32 x i8> %3
6370}
6371
6372define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006373; GENERIC-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006374; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006375; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6376; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6377; GENERIC-NEXT: retq # sched: [1:1.00]
6378;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006379; HASWELL-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006380; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006381; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006382; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6383; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006384;
Gadi Haber85d99b42017-10-17 13:45:39 +00006385; BROADWELL-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006386; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006387; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006388; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6389; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006390;
Gadi Haber767d98b2017-08-30 08:08:50 +00006391; SKYLAKE-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006392; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006393; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6394; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6395; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006396;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006397; SKX-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006398; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006399; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6400; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6401; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006402;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006403; ZNVER1-LABEL: test_psubd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006404; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006405; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6406; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006407; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006408 %1 = sub <8 x i32> %a0, %a1
6409 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6410 %3 = sub <8 x i32> %1, %2
6411 ret <8 x i32> %3
6412}
6413
6414define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006415; GENERIC-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006416; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006417; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6418; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6419; GENERIC-NEXT: retq # sched: [1:1.00]
6420;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006421; HASWELL-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006422; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006423; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006424; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6425; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006426;
Gadi Haber85d99b42017-10-17 13:45:39 +00006427; BROADWELL-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006428; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006429; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006430; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6431; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006432;
Gadi Haber767d98b2017-08-30 08:08:50 +00006433; SKYLAKE-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006434; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006435; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6436; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6437; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006438;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006439; SKX-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006440; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006441; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6442; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6443; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006444;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006445; ZNVER1-LABEL: test_psubq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006446; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006447; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6448; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006449; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006450 %1 = sub <4 x i64> %a0, %a1
6451 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6452 %3 = sub <4 x i64> %1, %2
6453 ret <4 x i64> %3
6454}
6455
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006456define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6457; GENERIC-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006458; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006459; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6460; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6461; GENERIC-NEXT: retq # sched: [1:1.00]
6462;
6463; HASWELL-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006464; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006465; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006466; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6467; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006468;
Gadi Haber85d99b42017-10-17 13:45:39 +00006469; BROADWELL-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006470; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006471; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006472; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6473; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006474;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006475; SKYLAKE-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006476; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006477; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6478; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6479; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006480;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006481; SKX-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006482; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006483; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6484; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6485; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006486;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006487; ZNVER1-LABEL: test_psubsb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006488; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006489; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6490; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6491; ZNVER1-NEXT: retq # sched: [1:0.50]
6492 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
6493 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6494 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2)
6495 ret <32 x i8> %3
6496}
6497declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
6498
6499define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6500; GENERIC-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006501; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006502; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6503; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6504; GENERIC-NEXT: retq # sched: [1:1.00]
6505;
6506; HASWELL-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006507; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006508; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006509; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6510; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006511;
Gadi Haber85d99b42017-10-17 13:45:39 +00006512; BROADWELL-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006513; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006514; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006515; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6516; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006517;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006518; SKYLAKE-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006519; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006520; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6521; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6522; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006523;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006524; SKX-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006525; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006526; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6527; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6528; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006529;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006530; ZNVER1-LABEL: test_psubsw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006531; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006532; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6533; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6534; ZNVER1-NEXT: retq # sched: [1:0.50]
6535 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
6536 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6537 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2)
6538 ret <16 x i16> %3
6539}
6540declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
6541
6542define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6543; GENERIC-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006544; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006545; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6546; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6547; GENERIC-NEXT: retq # sched: [1:1.00]
6548;
6549; HASWELL-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006550; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006551; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006552; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6553; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006554;
Gadi Haber85d99b42017-10-17 13:45:39 +00006555; BROADWELL-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006556; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006557; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006558; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6559; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006560;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006561; SKYLAKE-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006562; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006563; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6564; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6565; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006566;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006567; SKX-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006568; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006569; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6570; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6571; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006572;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006573; ZNVER1-LABEL: test_psubusb:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006574; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006575; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6576; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6577; ZNVER1-NEXT: retq # sched: [1:0.50]
6578 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
6579 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6580 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2)
6581 ret <32 x i8> %3
6582}
6583declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
6584
6585define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6586; GENERIC-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006587; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006588; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6589; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6590; GENERIC-NEXT: retq # sched: [1:1.00]
6591;
6592; HASWELL-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006593; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006594; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006595; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6596; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006597;
Gadi Haber85d99b42017-10-17 13:45:39 +00006598; BROADWELL-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006599; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006600; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006601; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6602; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006603;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006604; SKYLAKE-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006605; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006606; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6607; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6608; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006609;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006610; SKX-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006611; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006612; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6613; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6614; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006615;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006616; ZNVER1-LABEL: test_psubusw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006617; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006618; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6619; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6620; ZNVER1-NEXT: retq # sched: [1:0.50]
6621 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
6622 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6623 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2)
6624 ret <16 x i16> %3
6625}
6626declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
6627
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006628define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00006629; GENERIC-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006630; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00006631; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6632; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6633; GENERIC-NEXT: retq # sched: [1:1.00]
6634;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006635; HASWELL-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006636; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006637; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006638; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6639; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006640;
Gadi Haber85d99b42017-10-17 13:45:39 +00006641; BROADWELL-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006642; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006643; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006644; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6645; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006646;
Gadi Haber767d98b2017-08-30 08:08:50 +00006647; SKYLAKE-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006648; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006649; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6650; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6651; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00006652;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006653; SKX-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006654; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00006655; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6656; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6657; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006658;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006659; ZNVER1-LABEL: test_psubw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006660; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00006661; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6662; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00006663; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00006664 %1 = sub <16 x i16> %a0, %a1
6665 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6666 %3 = sub <16 x i16> %1, %2
6667 ret <16 x i16> %3
6668}
6669
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006670define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6671; GENERIC-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006672; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006673; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6674; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
6675; GENERIC-NEXT: retq # sched: [1:1.00]
6676;
6677; HASWELL-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006678; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006679; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006680; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6681; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006682;
Gadi Haber85d99b42017-10-17 13:45:39 +00006683; BROADWELL-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006684; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006685; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006686; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
6687; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006688;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006689; SKYLAKE-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006690; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006691; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006692; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6693; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006694;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006695; SKX-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006696; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006697; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006698; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6699; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006700;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006701; ZNVER1-LABEL: test_punpckhbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006702; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006703; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
6704; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
6705; ZNVER1-NEXT: retq # sched: [1:0.50]
6706 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6707 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6708 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6709 ret <32 x i8> %3
6710}
Simon Pilgrim76418aa2017-09-12 15:52:01 +00006711
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006712define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6713; GENERIC-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006714; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006715; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6716; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
6717; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6718; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6719; GENERIC-NEXT: retq # sched: [1:1.00]
6720;
6721; HASWELL-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006722; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006723; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006724; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006725; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6726; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006727; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006728;
Gadi Haber85d99b42017-10-17 13:45:39 +00006729; BROADWELL-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006730; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006731; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006732; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006733; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6734; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006735; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006736;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006737; SKYLAKE-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006738; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006739; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006740; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6741; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6742; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6743; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006744;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006745; SKX-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006746; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006747; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006748; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6749; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6750; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6751; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006752;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006753; ZNVER1-LABEL: test_punpckhdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006754; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006755; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
6756; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
6757; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6758; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6759; ZNVER1-NEXT: retq # sched: [1:0.50]
6760 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6761 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6762 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6763 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6764 ret <8 x i32> %4
6765}
6766
6767define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6768; GENERIC-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006769; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006770; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6771; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
6772; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6773; GENERIC-NEXT: retq # sched: [1:1.00]
6774;
6775; HASWELL-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006776; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006777; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006778; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006779; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006780; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006781;
Gadi Haber85d99b42017-10-17 13:45:39 +00006782; BROADWELL-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006783; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006784; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006785; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006786; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006787; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006788;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006789; SKYLAKE-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006790; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006791; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006792; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6793; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6794; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006795;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006796; SKX-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006797; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006798; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006799; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6800; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6801; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006802;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006803; ZNVER1-LABEL: test_punpckhqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006804; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006805; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
6806; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
6807; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6808; ZNVER1-NEXT: retq # sched: [1:0.50]
6809 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6810 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6811 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6812 %4 = add <4 x i64> %1, %3
6813 ret <4 x i64> %4
6814}
6815
6816define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6817; GENERIC-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006818; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006819; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6820; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
6821; GENERIC-NEXT: retq # sched: [1:1.00]
6822;
6823; HASWELL-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006824; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006825; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006826; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6827; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006828;
Gadi Haber85d99b42017-10-17 13:45:39 +00006829; BROADWELL-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006830; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006831; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006832; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
6833; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006834;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006835; SKYLAKE-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006836; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006837; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006838; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6839; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006840;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006841; SKX-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006842; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006843; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006844; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6845; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006846;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006847; ZNVER1-LABEL: test_punpckhwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006848; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006849; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
6850; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
6851; ZNVER1-NEXT: retq # sched: [1:0.50]
6852 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6853 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6854 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6855 ret <16 x i16> %3
6856}
6857
6858define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6859; GENERIC-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006860; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006861; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6862; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
6863; GENERIC-NEXT: retq # sched: [1:1.00]
6864;
6865; HASWELL-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006866; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006867; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006868; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6869; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006870;
Gadi Haber85d99b42017-10-17 13:45:39 +00006871; BROADWELL-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006872; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006873; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006874; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
6875; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006876;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006877; SKYLAKE-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006878; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006879; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006880; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6881; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006882;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006883; SKX-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006884; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006885; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006886; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6887; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006888;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006889; ZNVER1-LABEL: test_punpcklbw:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006890; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006891; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
6892; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
6893; ZNVER1-NEXT: retq # sched: [1:0.50]
6894 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6895 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6896 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6897 ret <32 x i8> %3
6898}
6899
6900define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6901; GENERIC-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006902; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006903; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6904; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
6905; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6906; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6907; GENERIC-NEXT: retq # sched: [1:1.00]
6908;
6909; HASWELL-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006910; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006911; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006912; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006913; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6914; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006915; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006916;
Gadi Haber85d99b42017-10-17 13:45:39 +00006917; BROADWELL-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006918; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006919; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006920; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006921; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6922; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006923; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006924;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006925; SKYLAKE-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006926; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006927; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006928; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6929; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6930; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6931; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006932;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006933; SKX-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006934; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006935; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006936; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6937; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6938; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6939; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006940;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006941; ZNVER1-LABEL: test_punpckldq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006942; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006943; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
6944; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
6945; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6946; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6947; ZNVER1-NEXT: retq # sched: [1:0.50]
6948 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6949 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6950 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6951 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6952 ret <8 x i32> %4
6953}
6954
6955define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6956; GENERIC-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006957; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006958; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6959; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
6960; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6961; GENERIC-NEXT: retq # sched: [1:1.00]
6962;
6963; HASWELL-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006964; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006965; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006966; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006967; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00006968; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006969;
Gadi Haber85d99b42017-10-17 13:45:39 +00006970; BROADWELL-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006971; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00006972; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00006973; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006974; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00006975; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00006976;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006977; SKYLAKE-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006978; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006979; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00006980; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6981; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6982; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006983;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006984; SKX-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006985; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006986; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00006987; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6988; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6989; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00006990;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006991; ZNVER1-LABEL: test_punpcklqdq:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00006992; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00006993; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
6994; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
6995; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6996; ZNVER1-NEXT: retq # sched: [1:0.50]
6997 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
6998 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6999 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7000 %4 = add <4 x i64> %1, %3
7001 ret <4 x i64> %4
7002}
7003
7004define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
7005; GENERIC-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007006; GENERIC: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007007; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7008; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
7009; GENERIC-NEXT: retq # sched: [1:1.00]
7010;
7011; HASWELL-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007012; HASWELL: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007013; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007014; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7015; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007016;
Gadi Haber85d99b42017-10-17 13:45:39 +00007017; BROADWELL-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007018; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00007019; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber323f2e12017-10-24 20:19:47 +00007020; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
7021; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007022;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007023; SKYLAKE-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007024; SKYLAKE: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007025; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007026; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7027; SKYLAKE-NEXT: retq # sched: [7:1.00]
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007028;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007029; SKX-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007030; SKX: # %bb.0:
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007031; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
Gadi Haber684944b2017-10-08 12:52:54 +00007032; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7033; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007034;
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007035; ZNVER1-LABEL: test_punpcklwd:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007036; ZNVER1: # %bb.0:
Simon Pilgrim0af5a7722017-09-12 15:01:20 +00007037; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
7038; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
7039; ZNVER1-NEXT: retq # sched: [1:0.50]
7040 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7041 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
7042 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7043 ret <16 x i16> %3
7044}
7045
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007046define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
Simon Pilgrim84846982017-08-01 15:14:35 +00007047; GENERIC-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007048; GENERIC: # %bb.0:
Simon Pilgrim84846982017-08-01 15:14:35 +00007049; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
7050; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
7051; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
7052; GENERIC-NEXT: retq # sched: [1:1.00]
7053;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007054; HASWELL-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007055; HASWELL: # %bb.0:
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007056; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007057; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007058; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber2cf601f2017-12-08 09:48:44 +00007059; HASWELL-NEXT: retq # sched: [7:1.00]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007060;
Gadi Haber85d99b42017-10-17 13:45:39 +00007061; BROADWELL-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007062; BROADWELL: # %bb.0:
Gadi Haber85d99b42017-10-17 13:45:39 +00007063; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
Gadi Haber323f2e12017-10-24 20:19:47 +00007064; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
Gadi Haber85d99b42017-10-17 13:45:39 +00007065; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
Gadi Haber323f2e12017-10-24 20:19:47 +00007066; BROADWELL-NEXT: retq # sched: [7:1.00]
Gadi Haber85d99b42017-10-17 13:45:39 +00007067;
Gadi Haber767d98b2017-08-30 08:08:50 +00007068; SKYLAKE-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007069; SKYLAKE: # %bb.0:
Gadi Haber1e0f1f42017-10-17 06:47:04 +00007070; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7071; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7072; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7073; SKYLAKE-NEXT: retq # sched: [7:1.00]
Gadi Haber767d98b2017-08-30 08:08:50 +00007074;
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007075; SKX-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007076; SKX: # %bb.0:
Gadi Haber684944b2017-10-08 12:52:54 +00007077; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7078; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7079; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7080; SKX-NEXT: retq # sched: [7:1.00]
Simon Pilgrima29dbdf2017-10-06 13:40:29 +00007081;
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007082; ZNVER1-LABEL: test_pxor:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007083; ZNVER1: # %bb.0:
Craig Topper106b5b62017-07-19 02:45:14 +00007084; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7085; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7086; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
Ashutosh Nemabfcac0b2017-08-31 12:38:35 +00007087; ZNVER1-NEXT: retq # sched: [1:0.50]
Simon Pilgrim946f08c2017-05-06 13:46:09 +00007088 %1 = xor <4 x i64> %a0, %a1
7089 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7090 %3 = xor <4 x i64> %1, %2
7091 %4 = add <4 x i64> %3, %a1
7092 ret <4 x i64> %4
7093}
7094
7095!0 = !{i32 1}