blob: bb9a342ae9ae46352d4e8de47e85f15a1ce76c40 [file] [log] [blame]
Simon Pilgrim068e38f2016-02-01 21:30:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
Simon Pilgrim528e94e2016-02-04 15:51:55 +00004;
5; Just one 32-bit run to make sure we do reasonable things.
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F
Simon Pilgrim068e38f2016-02-01 21:30:50 +00007
8define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
9; ALL-LABEL: merge_8f64_2f64_12u4:
10; ALL: # BB#0:
Craig Toppera1041ff2016-05-22 07:40:40 +000011; ALL-NEXT: vmovupd 16(%rdi), %ymm0
12; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
13; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +000014; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000015;
16; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
17; X32-AVX512F: # BB#0:
18; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Craig Toppera1041ff2016-05-22 07:40:40 +000019; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0
20; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1
21; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +000022; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +000023 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
24 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
25 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4
26 %val0 = load <2 x double>, <2 x double>* %ptr0
27 %val1 = load <2 x double>, <2 x double>* %ptr1
28 %val3 = load <2 x double>, <2 x double>* %ptr3
29 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30 %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
32 ret <8 x double> %res
33}
34
35define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
36; ALL-LABEL: merge_8f64_2f64_23z5:
37; ALL: # BB#0:
Craig Toppera1041ff2016-05-22 07:40:40 +000038; ALL-NEXT: vmovupd 32(%rdi), %ymm0
39; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
40; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
41; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +000042; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000043;
44; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
45; X32-AVX512F: # BB#0:
46; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Craig Toppera1041ff2016-05-22 07:40:40 +000047; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0
48; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
49; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1
50; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +000051; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +000052 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
53 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
54 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5
55 %val0 = load <2 x double>, <2 x double>* %ptr0
56 %val1 = load <2 x double>, <2 x double>* %ptr1
57 %val3 = load <2 x double>, <2 x double>* %ptr3
58 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
59 %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61 ret <8 x double> %res
62}
63
64define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
65; ALL-LABEL: merge_8f64_4f64_z2:
66; ALL: # BB#0:
67; ALL-NEXT: vxorpd %ymm0, %ymm0, %ymm0
68; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
69; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000070;
71; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
72; X32-AVX512F: # BB#0:
73; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
74; X32-AVX512F-NEXT: vxorpd %ymm0, %ymm0, %ymm0
75; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
76; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +000077 %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
78 %val1 = load <4 x double>, <4 x double>* %ptr1
79 %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80 ret <8 x double> %res
81}
82
83define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
84; ALL-LABEL: merge_8f64_f64_23uuuuu9:
85; ALL: # BB#0:
86; ALL-NEXT: vmovupd 16(%rdi), %zmm0
87; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000088;
89; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
90; X32-AVX512F: # BB#0:
91; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
92; X32-AVX512F-NEXT: vmovupd 16(%eax), %zmm0
93; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +000094 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
95 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
96 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
97 %val0 = load double, double* %ptr0
98 %val1 = load double, double* %ptr1
99 %val7 = load double, double* %ptr7
100 %res0 = insertelement <8 x double> undef, double %val0, i32 0
101 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
102 %res7 = insertelement <8 x double> %res1, double %val7, i32 7
103 ret <8 x double> %res7
104}
105
106define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
107; ALL-LABEL: merge_8f64_f64_12zzuuzz:
108; ALL: # BB#0:
109; ALL-NEXT: vmovupd 8(%rdi), %xmm0
110; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
111; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
112; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
113; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
114; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000115;
116; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
117; X32-AVX512F: # BB#0:
118; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
119; X32-AVX512F-NEXT: vmovupd 8(%eax), %xmm0
120; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
121; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
122; X32-AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
123; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
124; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000125 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
126 %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
127 %val0 = load double, double* %ptr0
128 %val1 = load double, double* %ptr1
129 %res0 = insertelement <8 x double> undef, double %val0, i32 0
130 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
131 %res2 = insertelement <8 x double> %res1, double 0.0, i32 2
132 %res3 = insertelement <8 x double> %res2, double 0.0, i32 3
133 %res6 = insertelement <8 x double> %res3, double 0.0, i32 6
134 %res7 = insertelement <8 x double> %res6, double 0.0, i32 7
135 ret <8 x double> %res7
136}
137
138define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
139; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
140; ALL: # BB#0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000141; ALL-NEXT: vmovupd 8(%rdi), %zmm0
142; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
143; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
144; ALL-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000145; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000146;
147; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
148; X32-AVX512F: # BB#0:
149; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000150; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0
151; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
152; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
153; X32-AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000154; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000155 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
156 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
157 %ptr4 = getelementptr inbounds double, double* %ptr, i64 5
158 %ptr7 = getelementptr inbounds double, double* %ptr, i64 8
159 %val0 = load double, double* %ptr0
160 %val2 = load double, double* %ptr2
161 %val4 = load double, double* %ptr4
162 %val7 = load double, double* %ptr7
163 %res0 = insertelement <8 x double> undef, double %val0, i32 0
164 %res2 = insertelement <8 x double> %res0, double %val2, i32 2
165 %res4 = insertelement <8 x double> %res2, double %val4, i32 4
166 %res5 = insertelement <8 x double> %res4, double 0.0, i32 5
167 %res7 = insertelement <8 x double> %res5, double %val7, i32 7
168 ret <8 x double> %res7
169}
170
171define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
172; ALL-LABEL: merge_8i64_4i64_z3:
173; ALL: # BB#0:
174; ALL-NEXT: vpxor %ymm0, %ymm0, %ymm0
175; ALL-NEXT: vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0
176; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000177;
178; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
179; X32-AVX512F: # BB#0:
180; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
181; X32-AVX512F-NEXT: vpxor %ymm0, %ymm0, %ymm0
182; X32-AVX512F-NEXT: vinserti64x4 $1, 96(%eax), %zmm0, %zmm0
183; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000184 %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
185 %val1 = load <4 x i64>, <4 x i64>* %ptr1
186 %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
187 ret <8 x i64> %res
188}
189
190define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
191; ALL-LABEL: merge_8i64_i64_56zz9uzz:
192; ALL: # BB#0:
193; ALL-NEXT: vmovdqu 40(%rdi), %xmm0
194; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
195; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrim96fe4ef2016-02-02 13:32:56 +0000196; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000197; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
198; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000199;
200; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
201; X32-AVX512F: # BB#0:
202; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
203; X32-AVX512F-NEXT: vmovdqu 40(%eax), %xmm0
204; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
205; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
206; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
207; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
208; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000209 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
210 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6
211 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9
212 %val0 = load i64, i64* %ptr0
213 %val1 = load i64, i64* %ptr1
214 %val4 = load i64, i64* %ptr4
215 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
216 %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
217 %res2 = insertelement <8 x i64> %res1, i64 0, i32 2
218 %res3 = insertelement <8 x i64> %res2, i64 0, i32 3
219 %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
220 %res6 = insertelement <8 x i64> %res4, i64 0, i32 6
221 %res7 = insertelement <8 x i64> %res6, i64 0, i32 7
222 ret <8 x i64> %res7
223}
224
225define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
226; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
227; ALL: # BB#0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000228; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
229; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
230; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
231; ALL-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000232; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000233;
234; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
235; X32-AVX512F: # BB#0:
236; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Matt Arsenault629d12d2016-04-22 20:21:36 +0000237; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000238; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
239; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
240; X32-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000241; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000242 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
243 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
244 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
245 %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
246 %val0 = load i64, i64* %ptr0
247 %val2 = load i64, i64* %ptr2
248 %val4 = load i64, i64* %ptr4
249 %val7 = load i64, i64* %ptr7
250 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
251 %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
252 %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
253 %res5 = insertelement <8 x i64> %res4, i64 0, i32 5
254 %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
255 ret <8 x i64> %res7
256}
257
258define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp {
259; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
260; ALL: # BB#0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000261; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000262; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000263;
264; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
265; X32-AVX512F: # BB#0:
266; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000267; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000268; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000269 %ptr0 = getelementptr inbounds float, float* %ptr, i64 8
270 %ptr1 = getelementptr inbounds float, float* %ptr, i64 9
271 %val0 = load float, float* %ptr0
272 %val1 = load float, float* %ptr1
273 %res0 = insertelement <16 x float> undef, float %val0, i32 0
274 %res1 = insertelement <16 x float> %res0, float %val1, i32 1
275 %res2 = insertelement <16 x float> %res1, float 0.0, i32 2
276 %res3 = insertelement <16 x float> %res2, float 0.0, i32 3
277 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4
278 %resF = insertelement <16 x float> %res4, float 0.0, i32 15
279 ret <16 x float> %resF
280}
281
282define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp {
283; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
284; ALL: # BB#0:
285; ALL-NEXT: vmovups 16(%rdi), %xmm0
286; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000287;
288; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
289; X32-AVX512F: # BB#0:
290; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
291; X32-AVX512F-NEXT: vmovups 16(%eax), %xmm0
292; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000293 %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
294 %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
295 %ptr3 = getelementptr inbounds float, float* %ptr, i64 7
296 %val0 = load float, float* %ptr0
297 %val1 = load float, float* %ptr1
298 %val3 = load float, float* %ptr3
299 %res0 = insertelement <16 x float> undef, float %val0, i32 0
300 %res1 = insertelement <16 x float> %res0, float %val1, i32 1
301 %res3 = insertelement <16 x float> %res1, float %val3, i32 3
302 ret <16 x float> %res3
303}
304
305define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp {
306; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
307; ALL: # BB#0:
308; ALL-NEXT: vmovups (%rdi), %zmm0
309; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000310;
311; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
312; X32-AVX512F: # BB#0:
313; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
314; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
315; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000316 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
317 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
318 %ptrC = getelementptr inbounds float, float* %ptr, i64 12
319 %ptrE = getelementptr inbounds float, float* %ptr, i64 14
320 %ptrF = getelementptr inbounds float, float* %ptr, i64 15
321 %val0 = load float, float* %ptr0
322 %val3 = load float, float* %ptr3
323 %valC = load float, float* %ptrC
324 %valE = load float, float* %ptrE
325 %valF = load float, float* %ptrF
326 %res0 = insertelement <16 x float> undef, float %val0, i32 0
327 %res3 = insertelement <16 x float> %res0, float %val3, i32 3
328 %resC = insertelement <16 x float> %res3, float %valC, i32 12
329 %resE = insertelement <16 x float> %resC, float %valE, i32 14
330 %resF = insertelement <16 x float> %resE, float %valF, i32 15
331 ret <16 x float> %resF
332}
333
334define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
335; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
336; ALL: # BB#0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000337; ALL-NEXT: vmovups (%rdi), %zmm0
338; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
339; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
340; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000341; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000342;
343; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
344; X32-AVX512F: # BB#0:
345; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000346; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
347; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
348; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
349; X32-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000350; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000351 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
352 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
353 %ptrC = getelementptr inbounds float, float* %ptr, i64 12
354 %ptrE = getelementptr inbounds float, float* %ptr, i64 14
355 %ptrF = getelementptr inbounds float, float* %ptr, i64 15
356 %val0 = load float, float* %ptr0
357 %val3 = load float, float* %ptr3
358 %valC = load float, float* %ptrC
359 %valE = load float, float* %ptrE
360 %valF = load float, float* %ptrF
361 %res0 = insertelement <16 x float> undef, float %val0, i32 0
362 %res3 = insertelement <16 x float> %res0, float %val3, i32 3
363 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4
364 %res5 = insertelement <16 x float> %res4, float 0.0, i32 5
365 %resC = insertelement <16 x float> %res5, float %valC, i32 12
366 %resD = insertelement <16 x float> %resC, float 0.0, i32 13
367 %resE = insertelement <16 x float> %resD, float %valE, i32 14
368 %resF = insertelement <16 x float> %resE, float %valF, i32 15
369 ret <16 x float> %resF
370}
371
372define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp {
373; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
374; ALL: # BB#0:
375; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000376; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000377;
378; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
379; X32-AVX512F: # BB#0:
380; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
381; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
382; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000383 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
384 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2
385 %val0 = load i32, i32* %ptr0
386 %val1 = load i32, i32* %ptr1
387 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
388 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
389 %res2 = insertelement <16 x i32> %res1, i32 0, i32 2
390 %res3 = insertelement <16 x i32> %res2, i32 0, i32 3
391 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4
392 %resF = insertelement <16 x i32> %res4, i32 0, i32 15
393 ret <16 x i32> %resF
394}
395
396define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp {
397; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
398; ALL: # BB#0:
399; ALL-NEXT: vmovups 8(%rdi), %xmm0
400; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000401;
402; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
403; X32-AVX512F: # BB#0:
404; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
405; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
406; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000407 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
408 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
409 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
410 %val0 = load i32, i32* %ptr0
411 %val1 = load i32, i32* %ptr1
412 %val3 = load i32, i32* %ptr3
413 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
414 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
415 %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
416 ret <16 x i32> %res3
417}
418
419define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
420; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
421; ALL: # BB#0:
422; ALL-NEXT: vmovdqu32 (%rdi), %zmm0
423; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000424;
425; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
426; X32-AVX512F: # BB#0:
427; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
428; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0
429; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000430 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
431 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
432 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
433 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
434 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
435 %val0 = load i32, i32* %ptr0
436 %val3 = load i32, i32* %ptr3
437 %valC = load i32, i32* %ptrC
438 %valE = load i32, i32* %ptrE
439 %valF = load i32, i32* %ptrF
440 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
441 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
442 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
443 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
444 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
445 ret <16 x i32> %resF
446}
447
448define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
449; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
450; ALL: # BB#0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000451; ALL-NEXT: vmovdqu32 (%rdi), %zmm0
452; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
453; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
454; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000455; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000456;
457; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
458; X32-AVX512F: # BB#0:
459; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000460; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0
461; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
462; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
463; X32-AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000464; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000465 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
466 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
467 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
468 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
469 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
470 %val0 = load i32, i32* %ptr0
471 %val3 = load i32, i32* %ptr3
472 %valC = load i32, i32* %ptrC
473 %valE = load i32, i32* %ptrE
474 %valF = load i32, i32* %ptrF
475 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
476 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
477 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4
478 %res5 = insertelement <16 x i32> %res4, i32 0, i32 5
479 %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
480 %resD = insertelement <16 x i32> %resC, i32 0, i32 13
481 %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
482 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
483 ret <16 x i32> %resF
484}
485
486define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
487; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
488; AVX512F: # BB#0:
489; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
490; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
491; AVX512F-NEXT: retq
492;
493; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
494; AVX512BW: # BB#0:
495; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000496; AVX512BW-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000497;
498; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
499; X32-AVX512F: # BB#0:
500; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
501; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
502; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
503; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000504 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
505 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
506 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4
507 %val0 = load i16, i16* %ptr0
508 %val1 = load i16, i16* %ptr1
509 %val3 = load i16, i16* %ptr3
510 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
511 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
512 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
513 %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
514 %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
515 ret <32 x i16> %res31
516}
517
518define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
519; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
520; ALL: # BB#0:
521; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
522; ALL-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000523;
524; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
525; X32-AVX512F: # BB#0:
526; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
527; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
528; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000529 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
530 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
531 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
532 %val0 = load i16, i16* %ptr0
533 %val1 = load i16, i16* %ptr1
534 %val3 = load i16, i16* %ptr3
535 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
536 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
537 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
538 ret <32 x i16> %res3
539}
540
541define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
542; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
543; AVX512F: # BB#0:
Simon Pilgrim6788f332016-02-04 16:12:56 +0000544; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
545; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000546; AVX512F-NEXT: retq
547;
548; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
549; AVX512BW: # BB#0:
Simon Pilgrim6788f332016-02-04 16:12:56 +0000550; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000551; AVX512BW-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000552;
553; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
554; X32-AVX512F: # BB#0:
555; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim6788f332016-02-04 16:12:56 +0000556; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
557; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000558; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000559 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
560 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
561 %val0 = load i16, i16* %ptr0
562 %val1 = load i16, i16* %ptr1
563 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
564 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
565 %res3 = insertelement <32 x i16> %res1, i16 0, i16 3
566 %resE = insertelement <32 x i16> %res3, i16 0, i16 14
567 %resF = insertelement <32 x i16> %resE, i16 0, i16 15
568 %resG = insertelement <32 x i16> %resF, i16 0, i16 16
569 %resH = insertelement <32 x i16> %resG, i16 0, i16 17
570 ret <32 x i16> %resH
571}
572
573define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
574; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
575; AVX512F: # BB#0:
576; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000577; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
578; AVX512F-NEXT: retq
579;
580; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
581; AVX512BW: # BB#0:
582; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000583; AVX512BW-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000584;
585; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
586; X32-AVX512F: # BB#0:
587; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
588; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
589; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
590; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000591 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
592 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
593 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
594 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8
595 %val0 = load i8, i8* %ptr0
596 %val1 = load i8, i8* %ptr1
597 %val3 = load i8, i8* %ptr3
598 %val7 = load i8, i8* %ptr7
599 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0
600 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1
601 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3
602 %res7 = insertelement <64 x i8> %res3, i8 %val7, i8 7
603 %res14 = insertelement <64 x i8> %res7, i8 0, i8 14
604 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15
605 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16
606 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17
607 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63
608 ret <64 x i8> %res63
609}
610
611define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
612; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
613; AVX512F: # BB#0:
Simon Pilgrim6788f332016-02-04 16:12:56 +0000614; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
615; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000616; AVX512F-NEXT: retq
617;
618; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
619; AVX512BW: # BB#0:
Simon Pilgrim6788f332016-02-04 16:12:56 +0000620; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000621; AVX512BW-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000622;
623; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
624; X32-AVX512F: # BB#0:
625; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim6788f332016-02-04 16:12:56 +0000626; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
627; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000628; X32-AVX512F-NEXT: retl
Simon Pilgrim068e38f2016-02-01 21:30:50 +0000629 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
630 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
631 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
632 %val0 = load i8, i8* %ptr0
633 %val1 = load i8, i8* %ptr1
634 %val3 = load i8, i8* %ptr3
635 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0
636 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1
637 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3
638 %res14 = insertelement <64 x i8> %res3, i8 0, i8 14
639 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15
640 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16
641 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17
642 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63
643 ret <64 x i8> %res63
644}
Simon Pilgrim01108902016-03-24 00:14:37 +0000645
646;
647; consecutive loads including any/all volatiles may not be combined
648;
649
650define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp {
651; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
652; ALL: # BB#0:
653; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
654; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
655; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1
656; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
657; ALL-NEXT: retq
658;
659; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
660; X32-AVX512F: # BB#0:
661; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
662; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
663; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
664; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1
665; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
666; X32-AVX512F-NEXT: retl
667 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
668 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
669 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
670 %val0 = load volatile double, double* %ptr0
671 %val1 = load double, double* %ptr1
672 %val7 = load double, double* %ptr7
673 %res0 = insertelement <8 x double> undef, double %val0, i32 0
674 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
675 %res7 = insertelement <8 x double> %res1, double %val7, i32 7
676 ret <8 x double> %res7
677}
678
679define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp {
680; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
681; ALL: # BB#0:
682; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
683; ALL-NEXT: vpinsrd $3, 12(%rdi), %xmm0, %xmm0
684; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
685; ALL-NEXT: vpinsrd $2, 56(%rdi), %xmm1, %xmm1
686; ALL-NEXT: vpinsrd $3, 60(%rdi), %xmm1, %xmm1
687; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
688; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
689; ALL-NEXT: retq
690;
691; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
692; X32-AVX512F: # BB#0:
693; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
694; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
695; X32-AVX512F-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0
696; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
697; X32-AVX512F-NEXT: vpinsrd $2, 56(%eax), %xmm1, %xmm1
698; X32-AVX512F-NEXT: vpinsrd $3, 60(%eax), %xmm1, %xmm1
699; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
700; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
701; X32-AVX512F-NEXT: retl
702 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
703 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
704 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
705 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
706 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
707 %val0 = load volatile i32, i32* %ptr0
708 %val3 = load volatile i32, i32* %ptr3
709 %valC = load volatile i32, i32* %ptrC
710 %valE = load volatile i32, i32* %ptrE
711 %valF = load volatile i32, i32* %ptrF
712 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
713 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
714 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
715 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
716 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
717 ret <16 x i32> %resF
718}