blob: 70fc7fa4a1d7ada91b186b4909e40bfabcc0f77a [file] [log] [blame]
Simon Pilgrimb099d162016-11-30 11:30:33 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5;
6; 128-bit Vectors
7;
8
9define <4 x float> @test_unpackl_fhadd_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
10; X32-LABEL: test_unpackl_fhadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000011; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000012; X32-NEXT: vhaddps %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000013; X32-NEXT: retl
14;
15; X64-LABEL: test_unpackl_fhadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000016; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000017; X64-NEXT: vhaddps %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000018; X64-NEXT: retq
19 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
20 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3)
21 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
22 ret <4 x float> %3
23}
24
25define <2 x double> @test_unpackh_fhadd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
26; X32-LABEL: test_unpackh_fhadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000027; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000028; X32-NEXT: vhaddpd %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000029; X32-NEXT: retl
30;
31; X64-LABEL: test_unpackh_fhadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000032; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000033; X64-NEXT: vhaddpd %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000034; X64-NEXT: retq
35 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
36 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a2, <2 x double> %a3)
37 %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 1, i32 3>
38 ret <2 x double> %3
39}
40
41define <2 x double> @test_unpackl_fhsub_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
42; X32-LABEL: test_unpackl_fhsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000043; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000044; X32-NEXT: vhsubpd %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000045; X32-NEXT: retl
46;
47; X64-LABEL: test_unpackl_fhsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000048; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000049; X64-NEXT: vhsubpd %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000050; X64-NEXT: retq
51 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
52 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a2, <2 x double> %a3)
53 %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2>
54 ret <2 x double> %3
55}
56
57define <4 x float> @test_unpackh_fhsub_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
58; X32-LABEL: test_unpackh_fhsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000060; X32-NEXT: vhsubps %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000061; X32-NEXT: retl
62;
63; X64-LABEL: test_unpackh_fhsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000064; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000065; X64-NEXT: vhsubps %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000066; X64-NEXT: retq
67 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
68 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a2, <4 x float> %a3)
69 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
70 ret <4 x float> %3
71}
72
73define <8 x i16> @test_unpackl_hadd_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
74; X32-LABEL: test_unpackl_hadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000075; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000076; X32-NEXT: vphaddw %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000077; X32-NEXT: retl
78;
79; X64-LABEL: test_unpackl_hadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000080; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000081; X64-NEXT: vphaddw %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000082; X64-NEXT: retq
83 %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
84 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a2, <8 x i16> %a3)
85 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
86 ret <8 x i16> %3
87}
88
89define <4 x i32> @test_unpackh_hadd_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
90; X32-LABEL: test_unpackh_hadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000091; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000092; X32-NEXT: vphaddd %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000093; X32-NEXT: retl
94;
95; X64-LABEL: test_unpackh_hadd_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000096; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +000097; X64-NEXT: vphaddd %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +000098; X64-NEXT: retq
99 %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
100 %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a2, <4 x i32> %a3)
101 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
102 ret <4 x i32> %3
103}
104
105define <4 x i32> @test_unpackl_hsub_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
106; X32-LABEL: test_unpackl_hsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000107; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000108; X32-NEXT: vphsubd %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000109; X32-NEXT: retl
110;
111; X64-LABEL: test_unpackl_hsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000112; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000113; X64-NEXT: vphsubd %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000114; X64-NEXT: retq
115 %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
116 %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a2, <4 x i32> %a3)
117 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
118 ret <4 x i32> %3
119}
120
121define <8 x i16> @test_unpackh_hsub_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
122; X32-LABEL: test_unpackh_hsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000124; X32-NEXT: vphsubw %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000125; X32-NEXT: retl
126;
127; X64-LABEL: test_unpackh_hsub_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000128; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000129; X64-NEXT: vphsubw %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000130; X64-NEXT: retq
131 %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
132 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a2, <8 x i16> %a3)
133 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
134 ret <8 x i16> %3
135}
136
137define <16 x i8> @test_unpackl_packss_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
138; X32-LABEL: test_unpackl_packss_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000139; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000140; X32-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000141; X32-NEXT: retl
142;
143; X64-LABEL: test_unpackl_packss_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000144; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000145; X64-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000146; X64-NEXT: retq
147 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
148 %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a2, <8 x i16> %a3)
149 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
150 ret <16 x i8> %3
151}
152
153define <8 x i16> @test_unpackh_packss_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
154; X32-LABEL: test_unpackh_packss_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000155; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000156; X32-NEXT: vpackssdw %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000157; X32-NEXT: retl
158;
159; X64-LABEL: test_unpackh_packss_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000160; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000161; X64-NEXT: vpackssdw %xmm3, %xmm1, %xmm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000162; X64-NEXT: retq
163 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
164 %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a2, <4 x i32> %a3)
165 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
166 ret <8 x i16> %3
167}
168
169define <8 x i16> @test_unpackl_packus_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
170; X32-LABEL: test_unpackl_packus_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000171; X32: ## %bb.0:
Simon Pilgrimb099d162016-11-30 11:30:33 +0000172; X32-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
173; X32-NEXT: retl
174;
175; X64-LABEL: test_unpackl_packus_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; X64: ## %bb.0:
Simon Pilgrimb099d162016-11-30 11:30:33 +0000177; X64-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
178; X64-NEXT: retq
179 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
180 %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
181 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
182 ret <8 x i16> %3
183}
184
185define <16 x i8> @test_unpackh_packus_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
186; X32-LABEL: test_unpackh_packus_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000187; X32: ## %bb.0:
Simon Pilgrimb099d162016-11-30 11:30:33 +0000188; X32-NEXT: vpackuswb %xmm3, %xmm1, %xmm0
189; X32-NEXT: retl
190;
191; X64-LABEL: test_unpackh_packus_128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000192; X64: ## %bb.0:
Simon Pilgrimb099d162016-11-30 11:30:33 +0000193; X64-NEXT: vpackuswb %xmm3, %xmm1, %xmm0
194; X64-NEXT: retq
195 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
196 %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
197 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
198 ret <16 x i8> %3
199}
200
201;
202; 256-bit Vectors
203;
204
205define <8 x float> @test_unpackl_fhadd_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> %a3) {
206; X32-LABEL: test_unpackl_fhadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000207; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000208; X32-NEXT: vhaddps %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000209; X32-NEXT: retl
210;
211; X64-LABEL: test_unpackl_fhadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000212; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000213; X64-NEXT: vhaddps %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000214; X64-NEXT: retq
215 %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
216 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a2, <8 x float> %a3)
217 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
218 ret <8 x float> %3
219}
220
221define <4 x double> @test_unpackh_fhadd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> %a3) {
222; X32-LABEL: test_unpackh_fhadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000224; X32-NEXT: vhaddpd %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000225; X32-NEXT: retl
226;
227; X64-LABEL: test_unpackh_fhadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000228; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000229; X64-NEXT: vhaddpd %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000230; X64-NEXT: retq
231 %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
232 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a2, <4 x double> %a3)
233 %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
234 ret <4 x double> %3
235}
236
237define <4 x double> @test_unpackl_fhsub_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> %a3) {
238; X32-LABEL: test_unpackl_fhsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000239; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000240; X32-NEXT: vhsubpd %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000241; X32-NEXT: retl
242;
243; X64-LABEL: test_unpackl_fhsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000244; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000245; X64-NEXT: vhsubpd %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000246; X64-NEXT: retq
247 %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
248 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a2, <4 x double> %a3)
249 %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
250 ret <4 x double> %3
251}
252
253define <8 x float> @test_unpackh_fhsub_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> %a3) {
254; X32-LABEL: test_unpackh_fhsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000255; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000256; X32-NEXT: vhsubps %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000257; X32-NEXT: retl
258;
259; X64-LABEL: test_unpackh_fhsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000260; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000261; X64-NEXT: vhsubps %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000262; X64-NEXT: retq
263 %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
264 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a2, <8 x float> %a3)
265 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
266 ret <8 x float> %3
267}
268
269define <16 x i16> @test_unpackl_hadd_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
270; X32-LABEL: test_unpackl_hadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000271; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000272; X32-NEXT: vphaddw %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000273; X32-NEXT: retl
274;
275; X64-LABEL: test_unpackl_hadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000276; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000277; X64-NEXT: vphaddw %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000278; X64-NEXT: retq
279 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
280 %2 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a2, <16 x i16> %a3)
281 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27>
282 ret <16 x i16> %3
283}
284
285define <8 x i32> @test_unpackh_hadd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
286; X32-LABEL: test_unpackh_hadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000287; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000288; X32-NEXT: vphaddd %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000289; X32-NEXT: retl
290;
291; X64-LABEL: test_unpackh_hadd_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000292; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000293; X64-NEXT: vphaddd %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000294; X64-NEXT: retq
295 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
296 %2 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a2, <8 x i32> %a3)
297 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
298 ret <8 x i32> %3
299}
300
301define <8 x i32> @test_unpackl_hsub_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
302; X32-LABEL: test_unpackl_hsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000303; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000304; X32-NEXT: vphsubd %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000305; X32-NEXT: retl
306;
307; X64-LABEL: test_unpackl_hsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000308; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000309; X64-NEXT: vphsubd %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000310; X64-NEXT: retq
311 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
312 %2 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a2, <8 x i32> %a3)
313 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
314 ret <8 x i32> %3
315}
316
317define <16 x i16> @test_unpackh_hsub_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
318; X32-LABEL: test_unpackh_hsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000319; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000320; X32-NEXT: vphsubw %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000321; X32-NEXT: retl
322;
323; X64-LABEL: test_unpackh_hsub_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000324; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000325; X64-NEXT: vphsubw %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000326; X64-NEXT: retq
327 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
328 %2 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a2, <16 x i16> %a3)
329 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
330 ret <16 x i16> %3
331}
332
333define <32 x i8> @test_unpackl_packss_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
334; X32-LABEL: test_unpackl_packss_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000335; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000336; X32-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000337; X32-NEXT: retl
338;
339; X64-LABEL: test_unpackl_packss_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000340; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000341; X64-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000342; X64-NEXT: retq
343 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
344 %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a2, <16 x i16> %a3)
345 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
346 ret <32 x i8> %3
347}
348
349define <16 x i16> @test_unpackh_packss_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
350; X32-LABEL: test_unpackh_packss_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000351; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000352; X32-NEXT: vpackssdw %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000353; X32-NEXT: retl
354;
355; X64-LABEL: test_unpackh_packss_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000356; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000357; X64-NEXT: vpackssdw %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000358; X64-NEXT: retq
359 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
360 %2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a2, <8 x i32> %a3)
361 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
362 ret <16 x i16> %3
363}
364
365define <16 x i16> @test_unpackl_packus_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
366; X32-LABEL: test_unpackl_packus_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000367; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000368; X32-NEXT: vpackusdw %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000369; X32-NEXT: retl
370;
371; X64-LABEL: test_unpackl_packus_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000372; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000373; X64-NEXT: vpackusdw %ymm2, %ymm0, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000374; X64-NEXT: retq
375 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
376 %2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a2, <8 x i32> %a3)
377 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27>
378 ret <16 x i16> %3
379}
380
381define <32 x i8> @test_unpackh_packus_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
382; X32-LABEL: test_unpackh_packus_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; X32: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000384; X32-NEXT: vpacksswb %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000385; X32-NEXT: retl
386;
387; X64-LABEL: test_unpackh_packus_256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000388; X64: ## %bb.0:
Simon Pilgrim73f143e2017-10-07 12:42:23 +0000389; X64-NEXT: vpacksswb %ymm3, %ymm1, %ymm0
Simon Pilgrimb099d162016-11-30 11:30:33 +0000390; X64-NEXT: retq
391 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
392 %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a2, <16 x i16> %a3)
393 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
394 ret <32 x i8> %3
395}
396
397declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
398declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
399declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
400declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
401
402declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>)
403declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>)
404declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>)
405declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>)
406
407declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
408declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
409declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
410declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
411
412declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>)
413declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>)
414declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>)
415declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>)
416
417declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>)
418declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>)
419declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>)
420declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>)
421
422declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>)
423declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
424declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>)
425declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>)