blob: ef20ec43645ff4821501c726b9dd048a70b4bb34 [file] [log] [blame]
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3: # %bb.0:
15; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
16; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0
17; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
18; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
23; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
24; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
25; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
26; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
27; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
28; SSE2-SSSE3-NEXT: retq
29;
30; AVX12-LABEL: bitcast_v2i64_to_v2i1:
31; AVX12: # %bb.0:
32; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
33; AVX12-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
35; AVX12-NEXT: vpextrb $8, %xmm0, %eax
36; AVX12-NEXT: addb %cl, %al
37; AVX12-NEXT: # kill: def $al killed $al killed $eax
38; AVX12-NEXT: retq
39;
40; AVX512-LABEL: bitcast_v2i64_to_v2i1:
41; AVX512: # %bb.0:
42; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
43; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
44; AVX512-NEXT: kshiftrw $1, %k0, %k1
45; AVX512-NEXT: kmovd %k1, %ecx
46; AVX512-NEXT: kmovd %k0, %eax
47; AVX512-NEXT: addb %cl, %al
48; AVX512-NEXT: # kill: def $al killed $al killed $eax
49; AVX512-NEXT: retq
50 %1 = icmp slt <2 x i64> %a0, zeroinitializer
51 %2 = bitcast <2 x i1> %1 to <2 x i1>
52 %3 = extractelement <2 x i1> %2, i32 0
53 %4 = extractelement <2 x i1> %2, i32 1
54 %5 = add i1 %3, %4
55 ret i1 %5
56}
57
58define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
59; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
60; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000061; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000062; SSE2-SSSE3-NEXT: movl %eax, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000063; SSE2-SSSE3-NEXT: andl $3, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +000064; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000065; SSE2-SSSE3-NEXT: shrl $2, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000066; SSE2-SSSE3-NEXT: movq %rax, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000067; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
68; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
Simon Pilgrim10daecb2019-04-24 17:25:45 +000069; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
70; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
71; SSE2-SSSE3-NEXT: retq
72;
73; AVX12-LABEL: bitcast_v4i32_to_v2i2:
74; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000075; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000076; AVX12-NEXT: movl %eax, %ecx
77; AVX12-NEXT: shrl $2, %ecx
78; AVX12-NEXT: vmovd %ecx, %xmm0
79; AVX12-NEXT: andl $3, %eax
80; AVX12-NEXT: vmovd %eax, %xmm1
81; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
82; AVX12-NEXT: vpextrb $0, %xmm0, %eax
83; AVX12-NEXT: addb %cl, %al
84; AVX12-NEXT: # kill: def $al killed $al killed $eax
85; AVX12-NEXT: retq
86;
87; AVX512-LABEL: bitcast_v4i32_to_v2i2:
88; AVX512: # %bb.0:
89; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
90; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
91; AVX512-NEXT: kmovd %k0, %eax
92; AVX512-NEXT: movzbl %al, %ecx
93; AVX512-NEXT: shrl $2, %ecx
94; AVX512-NEXT: andl $3, %ecx
95; AVX512-NEXT: vmovd %ecx, %xmm0
96; AVX512-NEXT: andl $3, %eax
97; AVX512-NEXT: vmovd %eax, %xmm1
98; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
99; AVX512-NEXT: vpextrb $0, %xmm0, %eax
100; AVX512-NEXT: addb %cl, %al
101; AVX512-NEXT: # kill: def $al killed $al killed $eax
102; AVX512-NEXT: retq
103 %1 = icmp slt <4 x i32> %a0, zeroinitializer
104 %2 = bitcast <4 x i1> %1 to <2 x i2>
105 %3 = extractelement <2 x i2> %2, i32 0
106 %4 = extractelement <2 x i2> %2, i32 1
107 %5 = add i2 %3, %4
108 ret i2 %5
109}
110
111define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
112; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
113; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000114; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
115; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
116; SSE2-SSSE3-NEXT: movzbl %al, %ecx
117; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000118; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
119; SSE2-SSSE3-NEXT: andl $15, %eax
120; SSE2-SSSE3-NEXT: movq %rax, %xmm1
121; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
122; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
123; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
124; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
125; SSE2-SSSE3-NEXT: retq
126;
127; AVX12-LABEL: bitcast_v8i16_to_v2i4:
128; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000129; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
130; AVX12-NEXT: vpmovmskb %xmm0, %eax
131; AVX12-NEXT: movzbl %al, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000132; AVX12-NEXT: shrl $4, %ecx
133; AVX12-NEXT: vmovd %ecx, %xmm0
134; AVX12-NEXT: andl $15, %eax
135; AVX12-NEXT: vmovd %eax, %xmm1
136; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
137; AVX12-NEXT: vpextrb $0, %xmm0, %eax
138; AVX12-NEXT: addb %cl, %al
139; AVX12-NEXT: # kill: def $al killed $al killed $eax
140; AVX12-NEXT: retq
141;
142; AVX512-LABEL: bitcast_v8i16_to_v2i4:
143; AVX512: # %bb.0:
144; AVX512-NEXT: vpmovw2m %xmm0, %k0
145; AVX512-NEXT: kmovd %k0, %eax
146; AVX512-NEXT: movzbl %al, %ecx
147; AVX512-NEXT: shrl $4, %ecx
148; AVX512-NEXT: vmovd %ecx, %xmm0
149; AVX512-NEXT: andl $15, %eax
150; AVX512-NEXT: vmovd %eax, %xmm1
151; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
152; AVX512-NEXT: vpextrb $0, %xmm0, %eax
153; AVX512-NEXT: addb %cl, %al
154; AVX512-NEXT: # kill: def $al killed $al killed $eax
155; AVX512-NEXT: retq
156 %1 = icmp slt <8 x i16> %a0, zeroinitializer
157 %2 = bitcast <8 x i1> %1 to <2 x i4>
158 %3 = extractelement <2 x i4> %2, i32 0
159 %4 = extractelement <2 x i4> %2, i32 1
160 %5 = add i4 %3, %4
161 ret i4 %5
162}
163
164define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
165; SSE2-LABEL: bitcast_v16i8_to_v2i8:
166; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000167; SSE2-NEXT: pmovmskb %xmm0, %eax
168; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000169; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
170; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
171; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
172; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
173; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
174; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
175; SSE2-NEXT: retq
176;
177; SSSE3-LABEL: bitcast_v16i8_to_v2i8:
178; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000179; SSSE3-NEXT: pmovmskb %xmm0, %eax
180; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000181; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
182; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
183; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
184; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
185; SSSE3-NEXT: retq
186;
187; AVX12-LABEL: bitcast_v16i8_to_v2i8:
188; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000189; AVX12-NEXT: vpmovmskb %xmm0, %eax
190; AVX12-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000191; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
192; AVX12-NEXT: vpextrb $1, %xmm0, %eax
193; AVX12-NEXT: addb %cl, %al
194; AVX12-NEXT: # kill: def $al killed $al killed $eax
195; AVX12-NEXT: retq
196;
197; AVX512-LABEL: bitcast_v16i8_to_v2i8:
198; AVX512: # %bb.0:
199; AVX512-NEXT: vpmovb2m %xmm0, %k0
200; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
201; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
202; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
203; AVX512-NEXT: vpextrb $1, %xmm0, %eax
204; AVX512-NEXT: addb %cl, %al
205; AVX512-NEXT: # kill: def $al killed $al killed $eax
206; AVX512-NEXT: retq
207 %1 = icmp slt <16 x i8> %a0, zeroinitializer
208 %2 = bitcast <16 x i1> %1 to <2 x i8>
209 %3 = extractelement <2 x i8> %2, i32 0
210 %4 = extractelement <2 x i8> %2, i32 1
211 %5 = add i8 %3, %4
212 ret i8 %5
213}
214
215;
216; 256-bit vectors
217;
218
219define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
220; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
221; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000222; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
223; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000224; SSE2-SSSE3-NEXT: movl %eax, %ecx
225; SSE2-SSSE3-NEXT: shrl $2, %ecx
226; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
227; SSE2-SSSE3-NEXT: andl $3, %eax
228; SSE2-SSSE3-NEXT: movq %rax, %xmm1
229; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
230; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
231; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
232; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
233; SSE2-SSSE3-NEXT: retq
234;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000235; AVX12-LABEL: bitcast_v4i64_to_v2i2:
236; AVX12: # %bb.0:
237; AVX12-NEXT: vmovmskpd %ymm0, %eax
238; AVX12-NEXT: movl %eax, %ecx
239; AVX12-NEXT: shrl $2, %ecx
240; AVX12-NEXT: vmovd %ecx, %xmm0
241; AVX12-NEXT: andl $3, %eax
242; AVX12-NEXT: vmovd %eax, %xmm1
243; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
244; AVX12-NEXT: vpextrb $0, %xmm0, %eax
245; AVX12-NEXT: addb %cl, %al
246; AVX12-NEXT: # kill: def $al killed $al killed $eax
247; AVX12-NEXT: vzeroupper
248; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000249;
250; AVX512-LABEL: bitcast_v4i64_to_v2i2:
251; AVX512: # %bb.0:
252; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
253; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
254; AVX512-NEXT: kmovd %k0, %eax
255; AVX512-NEXT: movzbl %al, %ecx
256; AVX512-NEXT: shrl $2, %ecx
257; AVX512-NEXT: andl $3, %ecx
258; AVX512-NEXT: vmovd %ecx, %xmm0
259; AVX512-NEXT: andl $3, %eax
260; AVX512-NEXT: vmovd %eax, %xmm1
261; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
262; AVX512-NEXT: vpextrb $0, %xmm0, %eax
263; AVX512-NEXT: addb %cl, %al
264; AVX512-NEXT: # kill: def $al killed $al killed $eax
265; AVX512-NEXT: vzeroupper
266; AVX512-NEXT: retq
267 %1 = icmp slt <4 x i64> %a0, zeroinitializer
268 %2 = bitcast <4 x i1> %1 to <2 x i2>
269 %3 = extractelement <2 x i2> %2, i32 0
270 %4 = extractelement <2 x i2> %2, i32 1
271 %5 = add i2 %3, %4
272 ret i2 %5
273}
274
275define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
276; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
277; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000278; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
279; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
280; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
281; SSE2-SSSE3-NEXT: movzbl %al, %ecx
282; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000283; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
284; SSE2-SSSE3-NEXT: andl $15, %eax
285; SSE2-SSSE3-NEXT: movq %rax, %xmm1
286; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
287; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
288; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
289; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
290; SSE2-SSSE3-NEXT: retq
291;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000292; AVX12-LABEL: bitcast_v8i32_to_v2i4:
293; AVX12: # %bb.0:
294; AVX12-NEXT: vmovmskps %ymm0, %eax
295; AVX12-NEXT: movl %eax, %ecx
296; AVX12-NEXT: shrl $4, %ecx
297; AVX12-NEXT: vmovd %ecx, %xmm0
298; AVX12-NEXT: andl $15, %eax
299; AVX12-NEXT: vmovd %eax, %xmm1
300; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
301; AVX12-NEXT: vpextrb $0, %xmm0, %eax
302; AVX12-NEXT: addb %cl, %al
303; AVX12-NEXT: # kill: def $al killed $al killed $eax
304; AVX12-NEXT: vzeroupper
305; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000306;
307; AVX512-LABEL: bitcast_v8i32_to_v2i4:
308; AVX512: # %bb.0:
309; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
310; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
311; AVX512-NEXT: kmovd %k0, %eax
312; AVX512-NEXT: movzbl %al, %ecx
313; AVX512-NEXT: shrl $4, %ecx
314; AVX512-NEXT: vmovd %ecx, %xmm0
315; AVX512-NEXT: andl $15, %eax
316; AVX512-NEXT: vmovd %eax, %xmm1
317; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
318; AVX512-NEXT: vpextrb $0, %xmm0, %eax
319; AVX512-NEXT: addb %cl, %al
320; AVX512-NEXT: # kill: def $al killed $al killed $eax
321; AVX512-NEXT: vzeroupper
322; AVX512-NEXT: retq
323 %1 = icmp slt <8 x i32> %a0, zeroinitializer
324 %2 = bitcast <8 x i1> %1 to <2 x i4>
325 %3 = extractelement <2 x i4> %2, i32 0
326 %4 = extractelement <2 x i4> %2, i32 1
327 %5 = add i4 %3, %4
328 ret i4 %5
329}
330
331define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
332; SSE2-LABEL: bitcast_v16i16_to_v2i8:
333; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000334; SSE2-NEXT: packsswb %xmm1, %xmm0
335; SSE2-NEXT: pmovmskb %xmm0, %eax
336; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000337; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
338; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
339; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
340; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
341; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
342; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
343; SSE2-NEXT: retq
344;
345; SSSE3-LABEL: bitcast_v16i16_to_v2i8:
346; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000347; SSSE3-NEXT: packsswb %xmm1, %xmm0
348; SSSE3-NEXT: pmovmskb %xmm0, %eax
349; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000350; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
351; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
352; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
353; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
354; SSSE3-NEXT: retq
355;
356; AVX1-LABEL: bitcast_v16i16_to_v2i8:
357; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000358; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
359; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
360; AVX1-NEXT: vpmovmskb %xmm0, %eax
361; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000362; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
363; AVX1-NEXT: vpextrb $1, %xmm0, %eax
364; AVX1-NEXT: addb %cl, %al
365; AVX1-NEXT: # kill: def $al killed $al killed $eax
366; AVX1-NEXT: vzeroupper
367; AVX1-NEXT: retq
368;
369; AVX2-LABEL: bitcast_v16i16_to_v2i8:
370; AVX2: # %bb.0:
371; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
372; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000373; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
374; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
375; AVX2-NEXT: vpmovmskb %xmm0, %eax
376; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000377; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
378; AVX2-NEXT: vpextrb $1, %xmm0, %eax
379; AVX2-NEXT: addb %cl, %al
380; AVX2-NEXT: # kill: def $al killed $al killed $eax
381; AVX2-NEXT: vzeroupper
382; AVX2-NEXT: retq
383;
384; AVX512-LABEL: bitcast_v16i16_to_v2i8:
385; AVX512: # %bb.0:
386; AVX512-NEXT: vpmovw2m %ymm0, %k0
387; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
388; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
389; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
390; AVX512-NEXT: vpextrb $1, %xmm0, %eax
391; AVX512-NEXT: addb %cl, %al
392; AVX512-NEXT: # kill: def $al killed $al killed $eax
393; AVX512-NEXT: vzeroupper
394; AVX512-NEXT: retq
395 %1 = icmp slt <16 x i16> %a0, zeroinitializer
396 %2 = bitcast <16 x i1> %1 to <2 x i8>
397 %3 = extractelement <2 x i8> %2, i32 0
398 %4 = extractelement <2 x i8> %2, i32 1
399 %5 = add i8 %3, %4
400 ret i8 %5
401}
402
403define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
404; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
405; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000406; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
407; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
408; SSE2-SSSE3-NEXT: shll $16, %ecx
409; SSE2-SSSE3-NEXT: orl %eax, %ecx
410; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
411; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
412; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
413; SSE2-SSSE3-NEXT: addl %ecx, %eax
414; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000415; SSE2-SSSE3-NEXT: retq
416;
417; AVX1-LABEL: bitcast_v32i8_to_v2i16:
418; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000419; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000420; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000421; AVX1-NEXT: vpmovmskb %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000422; AVX1-NEXT: shll $16, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000423; AVX1-NEXT: orl %eax, %ecx
424; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000425; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
426; AVX1-NEXT: vpextrw $1, %xmm0, %eax
427; AVX1-NEXT: addl %ecx, %eax
428; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000429; AVX1-NEXT: vzeroupper
430; AVX1-NEXT: retq
431;
432; AVX2-LABEL: bitcast_v32i8_to_v2i16:
433; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000434; AVX2-NEXT: vpmovmskb %ymm0, %eax
435; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000436; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
437; AVX2-NEXT: vpextrw $1, %xmm0, %eax
438; AVX2-NEXT: addl %ecx, %eax
439; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000440; AVX2-NEXT: vzeroupper
441; AVX2-NEXT: retq
442;
443; AVX512-LABEL: bitcast_v32i8_to_v2i16:
444; AVX512: # %bb.0:
445; AVX512-NEXT: pushq %rbp
446; AVX512-NEXT: movq %rsp, %rbp
447; AVX512-NEXT: andq $-32, %rsp
448; AVX512-NEXT: subq $32, %rsp
449; AVX512-NEXT: vpmovb2m %ymm0, %k0
450; AVX512-NEXT: kmovd %k0, (%rsp)
451; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
452; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
453; AVX512-NEXT: vpextrw $1, %xmm0, %eax
454; AVX512-NEXT: addl %ecx, %eax
455; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
456; AVX512-NEXT: movq %rbp, %rsp
457; AVX512-NEXT: popq %rbp
458; AVX512-NEXT: vzeroupper
459; AVX512-NEXT: retq
460 %1 = icmp slt <32 x i8> %a0, zeroinitializer
461 %2 = bitcast <32 x i1> %1 to <2 x i16>
462 %3 = extractelement <2 x i16> %2, i32 0
463 %4 = extractelement <2 x i16> %2, i32 1
464 %5 = add i16 %3, %4
465 ret i16 %5
466}
467
468;
469; 512-bit vectors
470;
471
472define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
473; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
474; SSE2-SSSE3: # %bb.0:
475; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000476; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000477; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000478; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000479; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000480; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
481; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
482; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000483; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000484; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
485; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
486; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
487; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
488; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
489; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
490; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
491; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
492; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
493; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
494; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000495; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000496; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
497; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
498; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000499; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
500; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
501; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000502; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
503; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
504; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
505; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
506; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
507; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
508; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000509; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000510; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
511; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
512; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
513; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
514; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
515; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
516; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
517; SSE2-SSSE3-NEXT: movzbl %al, %ecx
518; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000519; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
520; SSE2-SSSE3-NEXT: andl $15, %eax
521; SSE2-SSSE3-NEXT: movq %rax, %xmm1
522; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
523; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
524; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
525; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
526; SSE2-SSSE3-NEXT: retq
527;
528; AVX1-LABEL: bitcast_v8i64_to_v2i4:
529; AVX1: # %bb.0:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000530; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000531; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000532; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
533; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
534; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000535; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
536; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
537; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
538; AVX1-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000539; AVX1-NEXT: movl %eax, %ecx
540; AVX1-NEXT: shrl $4, %ecx
541; AVX1-NEXT: vmovd %ecx, %xmm0
542; AVX1-NEXT: andl $15, %eax
543; AVX1-NEXT: vmovd %eax, %xmm1
544; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
545; AVX1-NEXT: vpextrb $0, %xmm0, %eax
546; AVX1-NEXT: addb %cl, %al
547; AVX1-NEXT: # kill: def $al killed $al killed $eax
548; AVX1-NEXT: vzeroupper
549; AVX1-NEXT: retq
550;
551; AVX2-LABEL: bitcast_v8i64_to_v2i4:
552; AVX2: # %bb.0:
553; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
554; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000555; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000556; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
557; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
558; AVX2-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000559; AVX2-NEXT: movl %eax, %ecx
560; AVX2-NEXT: shrl $4, %ecx
561; AVX2-NEXT: vmovd %ecx, %xmm0
562; AVX2-NEXT: andl $15, %eax
563; AVX2-NEXT: vmovd %eax, %xmm1
564; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
565; AVX2-NEXT: vpextrb $0, %xmm0, %eax
566; AVX2-NEXT: addb %cl, %al
567; AVX2-NEXT: # kill: def $al killed $al killed $eax
568; AVX2-NEXT: vzeroupper
569; AVX2-NEXT: retq
570;
571; AVX512-LABEL: bitcast_v8i64_to_v2i4:
572; AVX512: # %bb.0:
573; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
574; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
575; AVX512-NEXT: kmovd %k0, %eax
576; AVX512-NEXT: movzbl %al, %ecx
577; AVX512-NEXT: shrl $4, %ecx
578; AVX512-NEXT: vmovd %ecx, %xmm0
579; AVX512-NEXT: andl $15, %eax
580; AVX512-NEXT: vmovd %eax, %xmm1
581; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
582; AVX512-NEXT: vpextrb $0, %xmm0, %eax
583; AVX512-NEXT: addb %cl, %al
584; AVX512-NEXT: # kill: def $al killed $al killed $eax
585; AVX512-NEXT: vzeroupper
586; AVX512-NEXT: retq
587 %1 = icmp slt <8 x i64> %a0, zeroinitializer
588 %2 = bitcast <8 x i1> %1 to <2 x i4>
589 %3 = extractelement <2 x i4> %2, i32 0
590 %4 = extractelement <2 x i4> %2, i32 1
591 %5 = add i4 %3, %4
592 ret i4 %5
593}
594
595define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
596; SSE2-LABEL: bitcast_v16i32_to_v2i8:
597; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000598; SSE2-NEXT: packssdw %xmm3, %xmm2
599; SSE2-NEXT: packssdw %xmm1, %xmm0
600; SSE2-NEXT: packsswb %xmm2, %xmm0
601; SSE2-NEXT: pmovmskb %xmm0, %eax
602; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000603; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
604; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
605; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
606; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
607; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
608; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
609; SSE2-NEXT: retq
610;
611; SSSE3-LABEL: bitcast_v16i32_to_v2i8:
612; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000613; SSSE3-NEXT: packssdw %xmm3, %xmm2
614; SSSE3-NEXT: packssdw %xmm1, %xmm0
615; SSSE3-NEXT: packsswb %xmm2, %xmm0
616; SSSE3-NEXT: pmovmskb %xmm0, %eax
617; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000618; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
619; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
620; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
621; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
622; SSSE3-NEXT: retq
623;
624; AVX1-LABEL: bitcast_v16i32_to_v2i8:
625; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000626; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
627; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
628; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
629; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
630; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
631; AVX1-NEXT: vpmovmskb %xmm0, %eax
632; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000633; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
634; AVX1-NEXT: vpextrb $1, %xmm0, %eax
635; AVX1-NEXT: addb %cl, %al
636; AVX1-NEXT: # kill: def $al killed $al killed $eax
637; AVX1-NEXT: vzeroupper
638; AVX1-NEXT: retq
639;
640; AVX2-LABEL: bitcast_v16i32_to_v2i8:
641; AVX2: # %bb.0:
642; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000643; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000644; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000645; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
646; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
647; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
648; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
649; AVX2-NEXT: vpmovmskb %xmm0, %eax
650; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000651; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
652; AVX2-NEXT: vpextrb $1, %xmm0, %eax
653; AVX2-NEXT: addb %cl, %al
654; AVX2-NEXT: # kill: def $al killed $al killed $eax
655; AVX2-NEXT: vzeroupper
656; AVX2-NEXT: retq
657;
658; AVX512-LABEL: bitcast_v16i32_to_v2i8:
659; AVX512: # %bb.0:
660; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
661; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
662; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
663; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
664; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
665; AVX512-NEXT: vpextrb $1, %xmm0, %eax
666; AVX512-NEXT: addb %cl, %al
667; AVX512-NEXT: # kill: def $al killed $al killed $eax
668; AVX512-NEXT: vzeroupper
669; AVX512-NEXT: retq
670 %1 = icmp slt <16 x i32> %a0, zeroinitializer
671 %2 = bitcast <16 x i1> %1 to <2 x i8>
672 %3 = extractelement <2 x i8> %2, i32 0
673 %4 = extractelement <2 x i8> %2, i32 1
674 %5 = add i8 %3, %4
675 ret i8 %5
676}
677
678define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
679; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
680; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000681; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
682; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
683; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
684; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
685; SSE2-SSSE3-NEXT: shll $16, %ecx
686; SSE2-SSSE3-NEXT: orl %eax, %ecx
687; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
688; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
689; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
690; SSE2-SSSE3-NEXT: addl %ecx, %eax
691; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000692; SSE2-SSSE3-NEXT: retq
693;
694; AVX1-LABEL: bitcast_v32i16_to_v2i16:
695; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000696; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
697; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
698; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000699; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000700; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
701; AVX1-NEXT: vpmovmskb %xmm0, %ecx
702; AVX1-NEXT: shll $16, %ecx
703; AVX1-NEXT: orl %eax, %ecx
704; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000705; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
706; AVX1-NEXT: vpextrw $1, %xmm0, %eax
707; AVX1-NEXT: addl %ecx, %eax
708; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000709; AVX1-NEXT: vzeroupper
710; AVX1-NEXT: retq
711;
712; AVX2-LABEL: bitcast_v32i16_to_v2i16:
713; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000714; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
715; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
716; AVX2-NEXT: vpmovmskb %ymm0, %eax
717; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000718; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
719; AVX2-NEXT: vpextrw $1, %xmm0, %eax
720; AVX2-NEXT: addl %ecx, %eax
721; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000722; AVX2-NEXT: vzeroupper
723; AVX2-NEXT: retq
724;
725; AVX512-LABEL: bitcast_v32i16_to_v2i16:
726; AVX512: # %bb.0:
727; AVX512-NEXT: pushq %rbp
728; AVX512-NEXT: movq %rsp, %rbp
729; AVX512-NEXT: andq $-32, %rsp
730; AVX512-NEXT: subq $32, %rsp
731; AVX512-NEXT: vpmovw2m %zmm0, %k0
732; AVX512-NEXT: kmovd %k0, (%rsp)
733; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
734; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
735; AVX512-NEXT: vpextrw $1, %xmm0, %eax
736; AVX512-NEXT: addl %ecx, %eax
737; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
738; AVX512-NEXT: movq %rbp, %rsp
739; AVX512-NEXT: popq %rbp
740; AVX512-NEXT: vzeroupper
741; AVX512-NEXT: retq
742 %1 = icmp slt <32 x i16> %a0, zeroinitializer
743 %2 = bitcast <32 x i1> %1 to <2 x i16>
744 %3 = extractelement <2 x i16> %2, i32 0
745 %4 = extractelement <2 x i16> %2, i32 1
746 %5 = add i16 %3, %4
747 ret i16 %5
748}
749
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000750define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
751; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000752; SSE2-SSSE3: # %bb.0:
Craig Topperf9c30ed2019-04-25 18:19:59 +0000753; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4
754; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
755; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm5
756; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
757; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
758; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm3
759; SSE2-SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
760; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
761; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm2
762; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
763; SSE2-SSSE3-NEXT: pcmpgtb %xmm0, %xmm4
764; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
765; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
766; SSE2-SSSE3-NEXT: andl $1, %eax
767; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
768; SSE2-SSSE3-NEXT: andl $1, %ecx
769; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
770; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
771; SSE2-SSSE3-NEXT: andl $1, %ecx
772; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
773; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
774; SSE2-SSSE3-NEXT: andl $1, %ecx
775; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
776; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
777; SSE2-SSSE3-NEXT: andl $1, %ecx
778; SSE2-SSSE3-NEXT: shll $4, %ecx
779; SSE2-SSSE3-NEXT: orl %eax, %ecx
780; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
781; SSE2-SSSE3-NEXT: andl $1, %eax
782; SSE2-SSSE3-NEXT: shll $5, %eax
783; SSE2-SSSE3-NEXT: orl %ecx, %eax
784; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
785; SSE2-SSSE3-NEXT: andl $1, %ecx
786; SSE2-SSSE3-NEXT: shll $6, %ecx
787; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
788; SSE2-SSSE3-NEXT: andl $1, %edx
789; SSE2-SSSE3-NEXT: shll $7, %edx
790; SSE2-SSSE3-NEXT: orl %ecx, %edx
791; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
792; SSE2-SSSE3-NEXT: andl $1, %ecx
793; SSE2-SSSE3-NEXT: shll $8, %ecx
794; SSE2-SSSE3-NEXT: orl %edx, %ecx
795; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
796; SSE2-SSSE3-NEXT: andl $1, %edx
797; SSE2-SSSE3-NEXT: shll $9, %edx
798; SSE2-SSSE3-NEXT: orl %ecx, %edx
799; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
800; SSE2-SSSE3-NEXT: andl $1, %ecx
801; SSE2-SSSE3-NEXT: shll $10, %ecx
802; SSE2-SSSE3-NEXT: orl %edx, %ecx
803; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
804; SSE2-SSSE3-NEXT: andl $1, %edx
805; SSE2-SSSE3-NEXT: shll $11, %edx
806; SSE2-SSSE3-NEXT: orl %ecx, %edx
807; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
808; SSE2-SSSE3-NEXT: andl $1, %ecx
809; SSE2-SSSE3-NEXT: shll $12, %ecx
810; SSE2-SSSE3-NEXT: orl %edx, %ecx
811; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
812; SSE2-SSSE3-NEXT: andl $1, %edx
813; SSE2-SSSE3-NEXT: shll $13, %edx
814; SSE2-SSSE3-NEXT: orl %ecx, %edx
815; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
816; SSE2-SSSE3-NEXT: andl $1, %ecx
817; SSE2-SSSE3-NEXT: shll $14, %ecx
818; SSE2-SSSE3-NEXT: orl %edx, %ecx
819; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
820; SSE2-SSSE3-NEXT: shll $15, %edx
821; SSE2-SSSE3-NEXT: orl %ecx, %edx
822; SSE2-SSSE3-NEXT: orl %eax, %edx
823; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
824; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
825; SSE2-SSSE3-NEXT: andl $1, %eax
826; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
827; SSE2-SSSE3-NEXT: andl $1, %ecx
828; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
829; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
830; SSE2-SSSE3-NEXT: andl $1, %ecx
831; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
832; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
833; SSE2-SSSE3-NEXT: andl $1, %ecx
834; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
835; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
836; SSE2-SSSE3-NEXT: andl $1, %ecx
837; SSE2-SSSE3-NEXT: shll $4, %ecx
838; SSE2-SSSE3-NEXT: orl %eax, %ecx
839; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
840; SSE2-SSSE3-NEXT: andl $1, %eax
841; SSE2-SSSE3-NEXT: shll $5, %eax
842; SSE2-SSSE3-NEXT: orl %ecx, %eax
843; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
844; SSE2-SSSE3-NEXT: andl $1, %ecx
845; SSE2-SSSE3-NEXT: shll $6, %ecx
846; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
847; SSE2-SSSE3-NEXT: andl $1, %edx
848; SSE2-SSSE3-NEXT: shll $7, %edx
849; SSE2-SSSE3-NEXT: orl %ecx, %edx
850; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
851; SSE2-SSSE3-NEXT: andl $1, %ecx
852; SSE2-SSSE3-NEXT: shll $8, %ecx
853; SSE2-SSSE3-NEXT: orl %edx, %ecx
854; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
855; SSE2-SSSE3-NEXT: andl $1, %edx
856; SSE2-SSSE3-NEXT: shll $9, %edx
857; SSE2-SSSE3-NEXT: orl %ecx, %edx
858; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
859; SSE2-SSSE3-NEXT: andl $1, %ecx
860; SSE2-SSSE3-NEXT: shll $10, %ecx
861; SSE2-SSSE3-NEXT: orl %edx, %ecx
862; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
863; SSE2-SSSE3-NEXT: andl $1, %edx
864; SSE2-SSSE3-NEXT: shll $11, %edx
865; SSE2-SSSE3-NEXT: orl %ecx, %edx
866; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
867; SSE2-SSSE3-NEXT: andl $1, %ecx
868; SSE2-SSSE3-NEXT: shll $12, %ecx
869; SSE2-SSSE3-NEXT: orl %edx, %ecx
870; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
871; SSE2-SSSE3-NEXT: andl $1, %edx
872; SSE2-SSSE3-NEXT: shll $13, %edx
873; SSE2-SSSE3-NEXT: orl %ecx, %edx
874; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
875; SSE2-SSSE3-NEXT: andl $1, %ecx
876; SSE2-SSSE3-NEXT: shll $14, %ecx
877; SSE2-SSSE3-NEXT: orl %edx, %ecx
878; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
879; SSE2-SSSE3-NEXT: shll $15, %edx
880; SSE2-SSSE3-NEXT: orl %ecx, %edx
881; SSE2-SSSE3-NEXT: orl %eax, %edx
882; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
883; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
884; SSE2-SSSE3-NEXT: andl $1, %eax
885; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
886; SSE2-SSSE3-NEXT: andl $1, %ecx
887; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
888; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
889; SSE2-SSSE3-NEXT: andl $1, %ecx
890; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
891; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
892; SSE2-SSSE3-NEXT: andl $1, %ecx
893; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
894; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
895; SSE2-SSSE3-NEXT: andl $1, %ecx
896; SSE2-SSSE3-NEXT: shll $4, %ecx
897; SSE2-SSSE3-NEXT: orl %eax, %ecx
898; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
899; SSE2-SSSE3-NEXT: andl $1, %eax
900; SSE2-SSSE3-NEXT: shll $5, %eax
901; SSE2-SSSE3-NEXT: orl %ecx, %eax
902; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
903; SSE2-SSSE3-NEXT: andl $1, %ecx
904; SSE2-SSSE3-NEXT: shll $6, %ecx
905; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
906; SSE2-SSSE3-NEXT: andl $1, %edx
907; SSE2-SSSE3-NEXT: shll $7, %edx
908; SSE2-SSSE3-NEXT: orl %ecx, %edx
909; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
910; SSE2-SSSE3-NEXT: andl $1, %ecx
911; SSE2-SSSE3-NEXT: shll $8, %ecx
912; SSE2-SSSE3-NEXT: orl %edx, %ecx
913; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
914; SSE2-SSSE3-NEXT: andl $1, %edx
915; SSE2-SSSE3-NEXT: shll $9, %edx
916; SSE2-SSSE3-NEXT: orl %ecx, %edx
917; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
918; SSE2-SSSE3-NEXT: andl $1, %ecx
919; SSE2-SSSE3-NEXT: shll $10, %ecx
920; SSE2-SSSE3-NEXT: orl %edx, %ecx
921; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
922; SSE2-SSSE3-NEXT: andl $1, %edx
923; SSE2-SSSE3-NEXT: shll $11, %edx
924; SSE2-SSSE3-NEXT: orl %ecx, %edx
925; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
926; SSE2-SSSE3-NEXT: andl $1, %ecx
927; SSE2-SSSE3-NEXT: shll $12, %ecx
928; SSE2-SSSE3-NEXT: orl %edx, %ecx
929; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
930; SSE2-SSSE3-NEXT: andl $1, %edx
931; SSE2-SSSE3-NEXT: shll $13, %edx
932; SSE2-SSSE3-NEXT: orl %ecx, %edx
933; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
934; SSE2-SSSE3-NEXT: andl $1, %ecx
935; SSE2-SSSE3-NEXT: shll $14, %ecx
936; SSE2-SSSE3-NEXT: orl %edx, %ecx
937; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
938; SSE2-SSSE3-NEXT: shll $15, %edx
939; SSE2-SSSE3-NEXT: orl %ecx, %edx
940; SSE2-SSSE3-NEXT: orl %eax, %edx
941; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
942; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
943; SSE2-SSSE3-NEXT: andl $1, %eax
944; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
945; SSE2-SSSE3-NEXT: andl $1, %ecx
946; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
947; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
948; SSE2-SSSE3-NEXT: andl $1, %ecx
949; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
950; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
951; SSE2-SSSE3-NEXT: andl $1, %ecx
952; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
953; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
954; SSE2-SSSE3-NEXT: andl $1, %ecx
955; SSE2-SSSE3-NEXT: shll $4, %ecx
956; SSE2-SSSE3-NEXT: orl %eax, %ecx
957; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
958; SSE2-SSSE3-NEXT: andl $1, %eax
959; SSE2-SSSE3-NEXT: shll $5, %eax
960; SSE2-SSSE3-NEXT: orl %ecx, %eax
961; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
962; SSE2-SSSE3-NEXT: andl $1, %ecx
963; SSE2-SSSE3-NEXT: shll $6, %ecx
964; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
965; SSE2-SSSE3-NEXT: andl $1, %edx
966; SSE2-SSSE3-NEXT: shll $7, %edx
967; SSE2-SSSE3-NEXT: orl %ecx, %edx
968; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
969; SSE2-SSSE3-NEXT: andl $1, %ecx
970; SSE2-SSSE3-NEXT: shll $8, %ecx
971; SSE2-SSSE3-NEXT: orl %edx, %ecx
972; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
973; SSE2-SSSE3-NEXT: andl $1, %edx
974; SSE2-SSSE3-NEXT: shll $9, %edx
975; SSE2-SSSE3-NEXT: orl %ecx, %edx
976; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
977; SSE2-SSSE3-NEXT: andl $1, %ecx
978; SSE2-SSSE3-NEXT: shll $10, %ecx
979; SSE2-SSSE3-NEXT: orl %edx, %ecx
980; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
981; SSE2-SSSE3-NEXT: andl $1, %edx
982; SSE2-SSSE3-NEXT: shll $11, %edx
983; SSE2-SSSE3-NEXT: orl %ecx, %edx
984; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
985; SSE2-SSSE3-NEXT: andl $1, %ecx
986; SSE2-SSSE3-NEXT: shll $12, %ecx
987; SSE2-SSSE3-NEXT: orl %edx, %ecx
988; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
989; SSE2-SSSE3-NEXT: andl $1, %edx
990; SSE2-SSSE3-NEXT: shll $13, %edx
991; SSE2-SSSE3-NEXT: orl %ecx, %edx
992; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
993; SSE2-SSSE3-NEXT: andl $1, %ecx
994; SSE2-SSSE3-NEXT: shll $14, %ecx
995; SSE2-SSSE3-NEXT: orl %edx, %ecx
996; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
997; SSE2-SSSE3-NEXT: shll $15, %edx
998; SSE2-SSSE3-NEXT: orl %ecx, %edx
999; SSE2-SSSE3-NEXT: orl %eax, %edx
1000; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
1001; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1002; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
1003; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1]
1004; SSE2-SSSE3-NEXT: movd %xmm0, %eax
1005; SSE2-SSSE3-NEXT: addl %ecx, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001006; SSE2-SSSE3-NEXT: retq
1007;
Craig Topperf9c30ed2019-04-25 18:19:59 +00001008; AVX1-LABEL: bitcast_v64i8_to_v2i32:
1009; AVX1: # %bb.0:
1010; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1011; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
1012; AVX1-NEXT: vpextrb $1, %xmm3, %eax
1013; AVX1-NEXT: andl $1, %eax
1014; AVX1-NEXT: vpextrb $0, %xmm3, %ecx
1015; AVX1-NEXT: andl $1, %ecx
1016; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1017; AVX1-NEXT: vpextrb $2, %xmm3, %ecx
1018; AVX1-NEXT: andl $1, %ecx
1019; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1020; AVX1-NEXT: vpextrb $3, %xmm3, %ecx
1021; AVX1-NEXT: andl $1, %ecx
1022; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1023; AVX1-NEXT: vpextrb $4, %xmm3, %ecx
1024; AVX1-NEXT: andl $1, %ecx
1025; AVX1-NEXT: shll $4, %ecx
1026; AVX1-NEXT: orl %eax, %ecx
1027; AVX1-NEXT: vpextrb $5, %xmm3, %eax
1028; AVX1-NEXT: andl $1, %eax
1029; AVX1-NEXT: shll $5, %eax
1030; AVX1-NEXT: orl %ecx, %eax
1031; AVX1-NEXT: vpextrb $6, %xmm3, %ecx
1032; AVX1-NEXT: andl $1, %ecx
1033; AVX1-NEXT: shll $6, %ecx
1034; AVX1-NEXT: vpextrb $7, %xmm3, %edx
1035; AVX1-NEXT: andl $1, %edx
1036; AVX1-NEXT: shll $7, %edx
1037; AVX1-NEXT: orl %ecx, %edx
1038; AVX1-NEXT: vpextrb $8, %xmm3, %ecx
1039; AVX1-NEXT: andl $1, %ecx
1040; AVX1-NEXT: shll $8, %ecx
1041; AVX1-NEXT: orl %edx, %ecx
1042; AVX1-NEXT: vpextrb $9, %xmm3, %edx
1043; AVX1-NEXT: andl $1, %edx
1044; AVX1-NEXT: shll $9, %edx
1045; AVX1-NEXT: orl %ecx, %edx
1046; AVX1-NEXT: vpextrb $10, %xmm3, %ecx
1047; AVX1-NEXT: andl $1, %ecx
1048; AVX1-NEXT: shll $10, %ecx
1049; AVX1-NEXT: orl %edx, %ecx
1050; AVX1-NEXT: vpextrb $11, %xmm3, %edx
1051; AVX1-NEXT: andl $1, %edx
1052; AVX1-NEXT: shll $11, %edx
1053; AVX1-NEXT: orl %ecx, %edx
1054; AVX1-NEXT: vpextrb $12, %xmm3, %ecx
1055; AVX1-NEXT: andl $1, %ecx
1056; AVX1-NEXT: shll $12, %ecx
1057; AVX1-NEXT: orl %edx, %ecx
1058; AVX1-NEXT: vpextrb $13, %xmm3, %edx
1059; AVX1-NEXT: andl $1, %edx
1060; AVX1-NEXT: shll $13, %edx
1061; AVX1-NEXT: orl %ecx, %edx
1062; AVX1-NEXT: vpextrb $14, %xmm3, %ecx
1063; AVX1-NEXT: andl $1, %ecx
1064; AVX1-NEXT: shll $14, %ecx
1065; AVX1-NEXT: orl %edx, %ecx
1066; AVX1-NEXT: vpextrb $15, %xmm3, %edx
1067; AVX1-NEXT: andl $1, %edx
1068; AVX1-NEXT: shll $15, %edx
1069; AVX1-NEXT: orl %ecx, %edx
1070; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1071; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1072; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1073; AVX1-NEXT: andl $1, %ecx
1074; AVX1-NEXT: shll $16, %ecx
1075; AVX1-NEXT: orl %edx, %ecx
1076; AVX1-NEXT: vpextrb $1, %xmm1, %edx
1077; AVX1-NEXT: andl $1, %edx
1078; AVX1-NEXT: shll $17, %edx
1079; AVX1-NEXT: orl %ecx, %edx
1080; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1081; AVX1-NEXT: andl $1, %ecx
1082; AVX1-NEXT: shll $18, %ecx
1083; AVX1-NEXT: orl %edx, %ecx
1084; AVX1-NEXT: vpextrb $3, %xmm1, %edx
1085; AVX1-NEXT: andl $1, %edx
1086; AVX1-NEXT: shll $19, %edx
1087; AVX1-NEXT: orl %ecx, %edx
1088; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1089; AVX1-NEXT: andl $1, %ecx
1090; AVX1-NEXT: shll $20, %ecx
1091; AVX1-NEXT: orl %edx, %ecx
1092; AVX1-NEXT: vpextrb $5, %xmm1, %edx
1093; AVX1-NEXT: andl $1, %edx
1094; AVX1-NEXT: shll $21, %edx
1095; AVX1-NEXT: orl %ecx, %edx
1096; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1097; AVX1-NEXT: andl $1, %ecx
1098; AVX1-NEXT: shll $22, %ecx
1099; AVX1-NEXT: orl %edx, %ecx
1100; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1101; AVX1-NEXT: andl $1, %edx
1102; AVX1-NEXT: shll $23, %edx
1103; AVX1-NEXT: orl %ecx, %edx
1104; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1105; AVX1-NEXT: andl $1, %ecx
1106; AVX1-NEXT: shll $24, %ecx
1107; AVX1-NEXT: orl %edx, %ecx
1108; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1109; AVX1-NEXT: andl $1, %edx
1110; AVX1-NEXT: shll $25, %edx
1111; AVX1-NEXT: orl %ecx, %edx
1112; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1113; AVX1-NEXT: andl $1, %ecx
1114; AVX1-NEXT: shll $26, %ecx
1115; AVX1-NEXT: orl %edx, %ecx
1116; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1117; AVX1-NEXT: andl $1, %edx
1118; AVX1-NEXT: shll $27, %edx
1119; AVX1-NEXT: orl %ecx, %edx
1120; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1121; AVX1-NEXT: andl $1, %ecx
1122; AVX1-NEXT: shll $28, %ecx
1123; AVX1-NEXT: orl %edx, %ecx
1124; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1125; AVX1-NEXT: andl $1, %edx
1126; AVX1-NEXT: shll $29, %edx
1127; AVX1-NEXT: orl %ecx, %edx
1128; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1129; AVX1-NEXT: andl $1, %ecx
1130; AVX1-NEXT: shll $30, %ecx
1131; AVX1-NEXT: orl %edx, %ecx
1132; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1133; AVX1-NEXT: shll $31, %edx
1134; AVX1-NEXT: orl %ecx, %edx
1135; AVX1-NEXT: orl %eax, %edx
1136; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1137; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm1
1138; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1139; AVX1-NEXT: andl $1, %eax
1140; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1141; AVX1-NEXT: andl $1, %ecx
1142; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1143; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1144; AVX1-NEXT: andl $1, %ecx
1145; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1146; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
1147; AVX1-NEXT: andl $1, %ecx
1148; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1149; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1150; AVX1-NEXT: andl $1, %ecx
1151; AVX1-NEXT: shll $4, %ecx
1152; AVX1-NEXT: orl %eax, %ecx
1153; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1154; AVX1-NEXT: andl $1, %eax
1155; AVX1-NEXT: shll $5, %eax
1156; AVX1-NEXT: orl %ecx, %eax
1157; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1158; AVX1-NEXT: andl $1, %ecx
1159; AVX1-NEXT: shll $6, %ecx
1160; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1161; AVX1-NEXT: andl $1, %edx
1162; AVX1-NEXT: shll $7, %edx
1163; AVX1-NEXT: orl %ecx, %edx
1164; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1165; AVX1-NEXT: andl $1, %ecx
1166; AVX1-NEXT: shll $8, %ecx
1167; AVX1-NEXT: orl %edx, %ecx
1168; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1169; AVX1-NEXT: andl $1, %edx
1170; AVX1-NEXT: shll $9, %edx
1171; AVX1-NEXT: orl %ecx, %edx
1172; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1173; AVX1-NEXT: andl $1, %ecx
1174; AVX1-NEXT: shll $10, %ecx
1175; AVX1-NEXT: orl %edx, %ecx
1176; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1177; AVX1-NEXT: andl $1, %edx
1178; AVX1-NEXT: shll $11, %edx
1179; AVX1-NEXT: orl %ecx, %edx
1180; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1181; AVX1-NEXT: andl $1, %ecx
1182; AVX1-NEXT: shll $12, %ecx
1183; AVX1-NEXT: orl %edx, %ecx
1184; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1185; AVX1-NEXT: andl $1, %edx
1186; AVX1-NEXT: shll $13, %edx
1187; AVX1-NEXT: orl %ecx, %edx
1188; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1189; AVX1-NEXT: andl $1, %ecx
1190; AVX1-NEXT: shll $14, %ecx
1191; AVX1-NEXT: orl %edx, %ecx
1192; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1193; AVX1-NEXT: andl $1, %edx
1194; AVX1-NEXT: shll $15, %edx
1195; AVX1-NEXT: orl %ecx, %edx
1196; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1197; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1198; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
1199; AVX1-NEXT: andl $1, %ecx
1200; AVX1-NEXT: shll $16, %ecx
1201; AVX1-NEXT: orl %edx, %ecx
1202; AVX1-NEXT: vpextrb $1, %xmm0, %edx
1203; AVX1-NEXT: andl $1, %edx
1204; AVX1-NEXT: shll $17, %edx
1205; AVX1-NEXT: orl %ecx, %edx
1206; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
1207; AVX1-NEXT: andl $1, %ecx
1208; AVX1-NEXT: shll $18, %ecx
1209; AVX1-NEXT: orl %edx, %ecx
1210; AVX1-NEXT: vpextrb $3, %xmm0, %edx
1211; AVX1-NEXT: andl $1, %edx
1212; AVX1-NEXT: shll $19, %edx
1213; AVX1-NEXT: orl %ecx, %edx
1214; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
1215; AVX1-NEXT: andl $1, %ecx
1216; AVX1-NEXT: shll $20, %ecx
1217; AVX1-NEXT: orl %edx, %ecx
1218; AVX1-NEXT: vpextrb $5, %xmm0, %edx
1219; AVX1-NEXT: andl $1, %edx
1220; AVX1-NEXT: shll $21, %edx
1221; AVX1-NEXT: orl %ecx, %edx
1222; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
1223; AVX1-NEXT: andl $1, %ecx
1224; AVX1-NEXT: shll $22, %ecx
1225; AVX1-NEXT: orl %edx, %ecx
1226; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1227; AVX1-NEXT: andl $1, %edx
1228; AVX1-NEXT: shll $23, %edx
1229; AVX1-NEXT: orl %ecx, %edx
1230; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1231; AVX1-NEXT: andl $1, %ecx
1232; AVX1-NEXT: shll $24, %ecx
1233; AVX1-NEXT: orl %edx, %ecx
1234; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1235; AVX1-NEXT: andl $1, %edx
1236; AVX1-NEXT: shll $25, %edx
1237; AVX1-NEXT: orl %ecx, %edx
1238; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1239; AVX1-NEXT: andl $1, %ecx
1240; AVX1-NEXT: shll $26, %ecx
1241; AVX1-NEXT: orl %edx, %ecx
1242; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1243; AVX1-NEXT: andl $1, %edx
1244; AVX1-NEXT: shll $27, %edx
1245; AVX1-NEXT: orl %ecx, %edx
1246; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1247; AVX1-NEXT: andl $1, %ecx
1248; AVX1-NEXT: shll $28, %ecx
1249; AVX1-NEXT: orl %edx, %ecx
1250; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1251; AVX1-NEXT: andl $1, %edx
1252; AVX1-NEXT: shll $29, %edx
1253; AVX1-NEXT: orl %ecx, %edx
1254; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1255; AVX1-NEXT: andl $1, %ecx
1256; AVX1-NEXT: shll $30, %ecx
1257; AVX1-NEXT: orl %edx, %ecx
1258; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1259; AVX1-NEXT: shll $31, %edx
1260; AVX1-NEXT: orl %ecx, %edx
1261; AVX1-NEXT: orl %eax, %edx
1262; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1263; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1264; AVX1-NEXT: vmovd %xmm0, %ecx
1265; AVX1-NEXT: vpextrd $1, %xmm0, %eax
1266; AVX1-NEXT: addl %ecx, %eax
1267; AVX1-NEXT: vzeroupper
1268; AVX1-NEXT: retq
1269;
1270; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1271; AVX2: # %bb.0:
1272; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1273; AVX2-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
1274; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1275; AVX2-NEXT: andl $1, %eax
1276; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1277; AVX2-NEXT: andl $1, %ecx
1278; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1279; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1280; AVX2-NEXT: andl $1, %ecx
1281; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1282; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1283; AVX2-NEXT: andl $1, %ecx
1284; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1285; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1286; AVX2-NEXT: andl $1, %ecx
1287; AVX2-NEXT: shll $4, %ecx
1288; AVX2-NEXT: orl %eax, %ecx
1289; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1290; AVX2-NEXT: andl $1, %eax
1291; AVX2-NEXT: shll $5, %eax
1292; AVX2-NEXT: orl %ecx, %eax
1293; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1294; AVX2-NEXT: andl $1, %ecx
1295; AVX2-NEXT: shll $6, %ecx
1296; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1297; AVX2-NEXT: andl $1, %edx
1298; AVX2-NEXT: shll $7, %edx
1299; AVX2-NEXT: orl %ecx, %edx
1300; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1301; AVX2-NEXT: andl $1, %ecx
1302; AVX2-NEXT: shll $8, %ecx
1303; AVX2-NEXT: orl %edx, %ecx
1304; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1305; AVX2-NEXT: andl $1, %edx
1306; AVX2-NEXT: shll $9, %edx
1307; AVX2-NEXT: orl %ecx, %edx
1308; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1309; AVX2-NEXT: andl $1, %ecx
1310; AVX2-NEXT: shll $10, %ecx
1311; AVX2-NEXT: orl %edx, %ecx
1312; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1313; AVX2-NEXT: andl $1, %edx
1314; AVX2-NEXT: shll $11, %edx
1315; AVX2-NEXT: orl %ecx, %edx
1316; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1317; AVX2-NEXT: andl $1, %ecx
1318; AVX2-NEXT: shll $12, %ecx
1319; AVX2-NEXT: orl %edx, %ecx
1320; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1321; AVX2-NEXT: andl $1, %edx
1322; AVX2-NEXT: shll $13, %edx
1323; AVX2-NEXT: orl %ecx, %edx
1324; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1325; AVX2-NEXT: andl $1, %ecx
1326; AVX2-NEXT: shll $14, %ecx
1327; AVX2-NEXT: orl %edx, %ecx
1328; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1329; AVX2-NEXT: andl $1, %edx
1330; AVX2-NEXT: shll $15, %edx
1331; AVX2-NEXT: orl %ecx, %edx
1332; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1333; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1334; AVX2-NEXT: andl $1, %ecx
1335; AVX2-NEXT: shll $16, %ecx
1336; AVX2-NEXT: orl %edx, %ecx
1337; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1338; AVX2-NEXT: andl $1, %edx
1339; AVX2-NEXT: shll $17, %edx
1340; AVX2-NEXT: orl %ecx, %edx
1341; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1342; AVX2-NEXT: andl $1, %ecx
1343; AVX2-NEXT: shll $18, %ecx
1344; AVX2-NEXT: orl %edx, %ecx
1345; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1346; AVX2-NEXT: andl $1, %edx
1347; AVX2-NEXT: shll $19, %edx
1348; AVX2-NEXT: orl %ecx, %edx
1349; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1350; AVX2-NEXT: andl $1, %ecx
1351; AVX2-NEXT: shll $20, %ecx
1352; AVX2-NEXT: orl %edx, %ecx
1353; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1354; AVX2-NEXT: andl $1, %edx
1355; AVX2-NEXT: shll $21, %edx
1356; AVX2-NEXT: orl %ecx, %edx
1357; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1358; AVX2-NEXT: andl $1, %ecx
1359; AVX2-NEXT: shll $22, %ecx
1360; AVX2-NEXT: orl %edx, %ecx
1361; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1362; AVX2-NEXT: andl $1, %edx
1363; AVX2-NEXT: shll $23, %edx
1364; AVX2-NEXT: orl %ecx, %edx
1365; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1366; AVX2-NEXT: andl $1, %ecx
1367; AVX2-NEXT: shll $24, %ecx
1368; AVX2-NEXT: orl %edx, %ecx
1369; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1370; AVX2-NEXT: andl $1, %edx
1371; AVX2-NEXT: shll $25, %edx
1372; AVX2-NEXT: orl %ecx, %edx
1373; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1374; AVX2-NEXT: andl $1, %ecx
1375; AVX2-NEXT: shll $26, %ecx
1376; AVX2-NEXT: orl %edx, %ecx
1377; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1378; AVX2-NEXT: andl $1, %edx
1379; AVX2-NEXT: shll $27, %edx
1380; AVX2-NEXT: orl %ecx, %edx
1381; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1382; AVX2-NEXT: andl $1, %ecx
1383; AVX2-NEXT: shll $28, %ecx
1384; AVX2-NEXT: orl %edx, %ecx
1385; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1386; AVX2-NEXT: andl $1, %edx
1387; AVX2-NEXT: shll $29, %edx
1388; AVX2-NEXT: orl %ecx, %edx
1389; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1390; AVX2-NEXT: andl $1, %ecx
1391; AVX2-NEXT: shll $30, %ecx
1392; AVX2-NEXT: orl %edx, %ecx
1393; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1394; AVX2-NEXT: shll $31, %edx
1395; AVX2-NEXT: orl %ecx, %edx
1396; AVX2-NEXT: orl %eax, %edx
1397; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1398; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1399; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1400; AVX2-NEXT: andl $1, %eax
1401; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1402; AVX2-NEXT: andl $1, %ecx
1403; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1404; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1405; AVX2-NEXT: andl $1, %ecx
1406; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1407; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1408; AVX2-NEXT: andl $1, %ecx
1409; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1410; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1411; AVX2-NEXT: andl $1, %ecx
1412; AVX2-NEXT: shll $4, %ecx
1413; AVX2-NEXT: orl %eax, %ecx
1414; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1415; AVX2-NEXT: andl $1, %eax
1416; AVX2-NEXT: shll $5, %eax
1417; AVX2-NEXT: orl %ecx, %eax
1418; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1419; AVX2-NEXT: andl $1, %ecx
1420; AVX2-NEXT: shll $6, %ecx
1421; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1422; AVX2-NEXT: andl $1, %edx
1423; AVX2-NEXT: shll $7, %edx
1424; AVX2-NEXT: orl %ecx, %edx
1425; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1426; AVX2-NEXT: andl $1, %ecx
1427; AVX2-NEXT: shll $8, %ecx
1428; AVX2-NEXT: orl %edx, %ecx
1429; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1430; AVX2-NEXT: andl $1, %edx
1431; AVX2-NEXT: shll $9, %edx
1432; AVX2-NEXT: orl %ecx, %edx
1433; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1434; AVX2-NEXT: andl $1, %ecx
1435; AVX2-NEXT: shll $10, %ecx
1436; AVX2-NEXT: orl %edx, %ecx
1437; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1438; AVX2-NEXT: andl $1, %edx
1439; AVX2-NEXT: shll $11, %edx
1440; AVX2-NEXT: orl %ecx, %edx
1441; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1442; AVX2-NEXT: andl $1, %ecx
1443; AVX2-NEXT: shll $12, %ecx
1444; AVX2-NEXT: orl %edx, %ecx
1445; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1446; AVX2-NEXT: andl $1, %edx
1447; AVX2-NEXT: shll $13, %edx
1448; AVX2-NEXT: orl %ecx, %edx
1449; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1450; AVX2-NEXT: andl $1, %ecx
1451; AVX2-NEXT: shll $14, %ecx
1452; AVX2-NEXT: orl %edx, %ecx
1453; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1454; AVX2-NEXT: andl $1, %edx
1455; AVX2-NEXT: shll $15, %edx
1456; AVX2-NEXT: orl %ecx, %edx
1457; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1458; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1459; AVX2-NEXT: andl $1, %ecx
1460; AVX2-NEXT: shll $16, %ecx
1461; AVX2-NEXT: orl %edx, %ecx
1462; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1463; AVX2-NEXT: andl $1, %edx
1464; AVX2-NEXT: shll $17, %edx
1465; AVX2-NEXT: orl %ecx, %edx
1466; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1467; AVX2-NEXT: andl $1, %ecx
1468; AVX2-NEXT: shll $18, %ecx
1469; AVX2-NEXT: orl %edx, %ecx
1470; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1471; AVX2-NEXT: andl $1, %edx
1472; AVX2-NEXT: shll $19, %edx
1473; AVX2-NEXT: orl %ecx, %edx
1474; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1475; AVX2-NEXT: andl $1, %ecx
1476; AVX2-NEXT: shll $20, %ecx
1477; AVX2-NEXT: orl %edx, %ecx
1478; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1479; AVX2-NEXT: andl $1, %edx
1480; AVX2-NEXT: shll $21, %edx
1481; AVX2-NEXT: orl %ecx, %edx
1482; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1483; AVX2-NEXT: andl $1, %ecx
1484; AVX2-NEXT: shll $22, %ecx
1485; AVX2-NEXT: orl %edx, %ecx
1486; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1487; AVX2-NEXT: andl $1, %edx
1488; AVX2-NEXT: shll $23, %edx
1489; AVX2-NEXT: orl %ecx, %edx
1490; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1491; AVX2-NEXT: andl $1, %ecx
1492; AVX2-NEXT: shll $24, %ecx
1493; AVX2-NEXT: orl %edx, %ecx
1494; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1495; AVX2-NEXT: andl $1, %edx
1496; AVX2-NEXT: shll $25, %edx
1497; AVX2-NEXT: orl %ecx, %edx
1498; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1499; AVX2-NEXT: andl $1, %ecx
1500; AVX2-NEXT: shll $26, %ecx
1501; AVX2-NEXT: orl %edx, %ecx
1502; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1503; AVX2-NEXT: andl $1, %edx
1504; AVX2-NEXT: shll $27, %edx
1505; AVX2-NEXT: orl %ecx, %edx
1506; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1507; AVX2-NEXT: andl $1, %ecx
1508; AVX2-NEXT: shll $28, %ecx
1509; AVX2-NEXT: orl %edx, %ecx
1510; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1511; AVX2-NEXT: andl $1, %edx
1512; AVX2-NEXT: shll $29, %edx
1513; AVX2-NEXT: orl %ecx, %edx
1514; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1515; AVX2-NEXT: andl $1, %ecx
1516; AVX2-NEXT: shll $30, %ecx
1517; AVX2-NEXT: orl %edx, %ecx
1518; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1519; AVX2-NEXT: shll $31, %edx
1520; AVX2-NEXT: orl %ecx, %edx
1521; AVX2-NEXT: orl %eax, %edx
1522; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1523; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1524; AVX2-NEXT: vmovd %xmm0, %ecx
1525; AVX2-NEXT: vpextrd $1, %xmm0, %eax
1526; AVX2-NEXT: addl %ecx, %eax
1527; AVX2-NEXT: vzeroupper
1528; AVX2-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001529;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +00001530; AVX512-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001531; AVX512: # %bb.0:
1532; AVX512-NEXT: vpmovb2m %zmm0, %k0
1533; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
1534; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1535; AVX512-NEXT: vmovd %xmm0, %ecx
1536; AVX512-NEXT: vpextrd $1, %xmm0, %eax
1537; AVX512-NEXT: addl %ecx, %eax
1538; AVX512-NEXT: vzeroupper
1539; AVX512-NEXT: retq
1540 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1541 %2 = bitcast <64 x i1> %1 to <2 x i32>
1542 %3 = extractelement <2 x i32> %2, i32 0
1543 %4 = extractelement <2 x i32> %2, i32 1
1544 %5 = add i32 %3, %4
1545 ret i32 %5
1546}