blob: adcee2abe33b6eff4f1342bbef6043a75a2e2539 [file] [log] [blame]
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000015; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
16; SSE2-SSSE3-NEXT: movl %ecx, %eax
17; SSE2-SSSE3-NEXT: shrb %al
18; SSE2-SSSE3-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000019; SSE2-SSSE3-NEXT: retq
20;
21; AVX12-LABEL: bitcast_v2i64_to_v2i1:
22; AVX12: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000023; AVX12-NEXT: vmovmskpd %xmm0, %ecx
24; AVX12-NEXT: movl %ecx, %eax
25; AVX12-NEXT: shrb %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000026; AVX12-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000027; AVX12-NEXT: retq
28;
29; AVX512-LABEL: bitcast_v2i64_to_v2i1:
30; AVX512: # %bb.0:
31; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
32; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
33; AVX512-NEXT: kshiftrw $1, %k0, %k1
34; AVX512-NEXT: kmovd %k1, %ecx
35; AVX512-NEXT: kmovd %k0, %eax
36; AVX512-NEXT: addb %cl, %al
37; AVX512-NEXT: # kill: def $al killed $al killed $eax
38; AVX512-NEXT: retq
39 %1 = icmp slt <2 x i64> %a0, zeroinitializer
40 %2 = bitcast <2 x i1> %1 to <2 x i1>
41 %3 = extractelement <2 x i1> %2, i32 0
42 %4 = extractelement <2 x i1> %2, i32 1
43 %5 = add i1 %3, %4
44 ret i1 %5
45}
46
47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000050; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000051; SSE2-SSSE3-NEXT: movl %eax, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000052; SSE2-SSSE3-NEXT: andl $3, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +000053; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000054; SSE2-SSSE3-NEXT: shrl $2, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000055; SSE2-SSSE3-NEXT: movq %rax, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000056; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
57; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
Simon Pilgrim10daecb2019-04-24 17:25:45 +000058; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
59; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
60; SSE2-SSSE3-NEXT: retq
61;
62; AVX12-LABEL: bitcast_v4i32_to_v2i2:
63; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000064; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000065; AVX12-NEXT: movl %eax, %ecx
66; AVX12-NEXT: shrl $2, %ecx
67; AVX12-NEXT: vmovd %ecx, %xmm0
68; AVX12-NEXT: andl $3, %eax
69; AVX12-NEXT: vmovd %eax, %xmm1
70; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
71; AVX12-NEXT: vpextrb $0, %xmm0, %eax
72; AVX12-NEXT: addb %cl, %al
73; AVX12-NEXT: # kill: def $al killed $al killed $eax
74; AVX12-NEXT: retq
75;
76; AVX512-LABEL: bitcast_v4i32_to_v2i2:
77; AVX512: # %bb.0:
78; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
79; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
80; AVX512-NEXT: kmovd %k0, %eax
81; AVX512-NEXT: movzbl %al, %ecx
82; AVX512-NEXT: shrl $2, %ecx
83; AVX512-NEXT: andl $3, %ecx
84; AVX512-NEXT: vmovd %ecx, %xmm0
85; AVX512-NEXT: andl $3, %eax
86; AVX512-NEXT: vmovd %eax, %xmm1
87; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
88; AVX512-NEXT: vpextrb $0, %xmm0, %eax
89; AVX512-NEXT: addb %cl, %al
90; AVX512-NEXT: # kill: def $al killed $al killed $eax
91; AVX512-NEXT: retq
92 %1 = icmp slt <4 x i32> %a0, zeroinitializer
93 %2 = bitcast <4 x i1> %1 to <2 x i2>
94 %3 = extractelement <2 x i2> %2, i32 0
95 %4 = extractelement <2 x i2> %2, i32 1
96 %5 = add i2 %3, %4
97 ret i2 %5
98}
99
100define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
101; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
102; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000103; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
104; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
105; SSE2-SSSE3-NEXT: movzbl %al, %ecx
106; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000107; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
108; SSE2-SSSE3-NEXT: andl $15, %eax
109; SSE2-SSSE3-NEXT: movq %rax, %xmm1
110; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
111; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
112; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
113; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
114; SSE2-SSSE3-NEXT: retq
115;
116; AVX12-LABEL: bitcast_v8i16_to_v2i4:
117; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000118; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
119; AVX12-NEXT: vpmovmskb %xmm0, %eax
120; AVX12-NEXT: movzbl %al, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000121; AVX12-NEXT: shrl $4, %ecx
122; AVX12-NEXT: vmovd %ecx, %xmm0
123; AVX12-NEXT: andl $15, %eax
124; AVX12-NEXT: vmovd %eax, %xmm1
125; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
126; AVX12-NEXT: vpextrb $0, %xmm0, %eax
127; AVX12-NEXT: addb %cl, %al
128; AVX12-NEXT: # kill: def $al killed $al killed $eax
129; AVX12-NEXT: retq
130;
131; AVX512-LABEL: bitcast_v8i16_to_v2i4:
132; AVX512: # %bb.0:
133; AVX512-NEXT: vpmovw2m %xmm0, %k0
134; AVX512-NEXT: kmovd %k0, %eax
135; AVX512-NEXT: movzbl %al, %ecx
136; AVX512-NEXT: shrl $4, %ecx
137; AVX512-NEXT: vmovd %ecx, %xmm0
138; AVX512-NEXT: andl $15, %eax
139; AVX512-NEXT: vmovd %eax, %xmm1
140; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
141; AVX512-NEXT: vpextrb $0, %xmm0, %eax
142; AVX512-NEXT: addb %cl, %al
143; AVX512-NEXT: # kill: def $al killed $al killed $eax
144; AVX512-NEXT: retq
145 %1 = icmp slt <8 x i16> %a0, zeroinitializer
146 %2 = bitcast <8 x i1> %1 to <2 x i4>
147 %3 = extractelement <2 x i4> %2, i32 0
148 %4 = extractelement <2 x i4> %2, i32 1
149 %5 = add i4 %3, %4
150 ret i4 %5
151}
152
153define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
154; SSE2-LABEL: bitcast_v16i8_to_v2i8:
155; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000156; SSE2-NEXT: pmovmskb %xmm0, %eax
157; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000158; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
159; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
160; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
161; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
162; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
163; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
164; SSE2-NEXT: retq
165;
166; SSSE3-LABEL: bitcast_v16i8_to_v2i8:
167; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000168; SSSE3-NEXT: pmovmskb %xmm0, %eax
169; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000170; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
171; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
172; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
173; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
174; SSSE3-NEXT: retq
175;
176; AVX12-LABEL: bitcast_v16i8_to_v2i8:
177; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000178; AVX12-NEXT: vpmovmskb %xmm0, %eax
179; AVX12-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000180; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
181; AVX12-NEXT: vpextrb $1, %xmm0, %eax
182; AVX12-NEXT: addb %cl, %al
183; AVX12-NEXT: # kill: def $al killed $al killed $eax
184; AVX12-NEXT: retq
185;
186; AVX512-LABEL: bitcast_v16i8_to_v2i8:
187; AVX512: # %bb.0:
188; AVX512-NEXT: vpmovb2m %xmm0, %k0
189; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
190; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
191; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
192; AVX512-NEXT: vpextrb $1, %xmm0, %eax
193; AVX512-NEXT: addb %cl, %al
194; AVX512-NEXT: # kill: def $al killed $al killed $eax
195; AVX512-NEXT: retq
196 %1 = icmp slt <16 x i8> %a0, zeroinitializer
197 %2 = bitcast <16 x i1> %1 to <2 x i8>
198 %3 = extractelement <2 x i8> %2, i32 0
199 %4 = extractelement <2 x i8> %2, i32 1
200 %5 = add i8 %3, %4
201 ret i8 %5
202}
203
204;
205; 256-bit vectors
206;
207
208define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
209; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
210; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000211; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
212; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000213; SSE2-SSSE3-NEXT: movl %eax, %ecx
214; SSE2-SSSE3-NEXT: shrl $2, %ecx
215; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
216; SSE2-SSSE3-NEXT: andl $3, %eax
217; SSE2-SSSE3-NEXT: movq %rax, %xmm1
218; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
219; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
220; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
221; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
222; SSE2-SSSE3-NEXT: retq
223;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000224; AVX12-LABEL: bitcast_v4i64_to_v2i2:
225; AVX12: # %bb.0:
226; AVX12-NEXT: vmovmskpd %ymm0, %eax
227; AVX12-NEXT: movl %eax, %ecx
228; AVX12-NEXT: shrl $2, %ecx
229; AVX12-NEXT: vmovd %ecx, %xmm0
230; AVX12-NEXT: andl $3, %eax
231; AVX12-NEXT: vmovd %eax, %xmm1
232; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
233; AVX12-NEXT: vpextrb $0, %xmm0, %eax
234; AVX12-NEXT: addb %cl, %al
235; AVX12-NEXT: # kill: def $al killed $al killed $eax
236; AVX12-NEXT: vzeroupper
237; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000238;
239; AVX512-LABEL: bitcast_v4i64_to_v2i2:
240; AVX512: # %bb.0:
241; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
242; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
243; AVX512-NEXT: kmovd %k0, %eax
244; AVX512-NEXT: movzbl %al, %ecx
245; AVX512-NEXT: shrl $2, %ecx
246; AVX512-NEXT: andl $3, %ecx
247; AVX512-NEXT: vmovd %ecx, %xmm0
248; AVX512-NEXT: andl $3, %eax
249; AVX512-NEXT: vmovd %eax, %xmm1
250; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
251; AVX512-NEXT: vpextrb $0, %xmm0, %eax
252; AVX512-NEXT: addb %cl, %al
253; AVX512-NEXT: # kill: def $al killed $al killed $eax
254; AVX512-NEXT: vzeroupper
255; AVX512-NEXT: retq
256 %1 = icmp slt <4 x i64> %a0, zeroinitializer
257 %2 = bitcast <4 x i1> %1 to <2 x i2>
258 %3 = extractelement <2 x i2> %2, i32 0
259 %4 = extractelement <2 x i2> %2, i32 1
260 %5 = add i2 %3, %4
261 ret i2 %5
262}
263
264define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
265; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
266; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000267; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
268; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
269; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
270; SSE2-SSSE3-NEXT: movzbl %al, %ecx
271; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000272; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
273; SSE2-SSSE3-NEXT: andl $15, %eax
274; SSE2-SSSE3-NEXT: movq %rax, %xmm1
275; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
276; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
277; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
278; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
279; SSE2-SSSE3-NEXT: retq
280;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000281; AVX12-LABEL: bitcast_v8i32_to_v2i4:
282; AVX12: # %bb.0:
283; AVX12-NEXT: vmovmskps %ymm0, %eax
284; AVX12-NEXT: movl %eax, %ecx
285; AVX12-NEXT: shrl $4, %ecx
286; AVX12-NEXT: vmovd %ecx, %xmm0
287; AVX12-NEXT: andl $15, %eax
288; AVX12-NEXT: vmovd %eax, %xmm1
289; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
290; AVX12-NEXT: vpextrb $0, %xmm0, %eax
291; AVX12-NEXT: addb %cl, %al
292; AVX12-NEXT: # kill: def $al killed $al killed $eax
293; AVX12-NEXT: vzeroupper
294; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000295;
296; AVX512-LABEL: bitcast_v8i32_to_v2i4:
297; AVX512: # %bb.0:
298; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
299; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
300; AVX512-NEXT: kmovd %k0, %eax
301; AVX512-NEXT: movzbl %al, %ecx
302; AVX512-NEXT: shrl $4, %ecx
303; AVX512-NEXT: vmovd %ecx, %xmm0
304; AVX512-NEXT: andl $15, %eax
305; AVX512-NEXT: vmovd %eax, %xmm1
306; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
307; AVX512-NEXT: vpextrb $0, %xmm0, %eax
308; AVX512-NEXT: addb %cl, %al
309; AVX512-NEXT: # kill: def $al killed $al killed $eax
310; AVX512-NEXT: vzeroupper
311; AVX512-NEXT: retq
312 %1 = icmp slt <8 x i32> %a0, zeroinitializer
313 %2 = bitcast <8 x i1> %1 to <2 x i4>
314 %3 = extractelement <2 x i4> %2, i32 0
315 %4 = extractelement <2 x i4> %2, i32 1
316 %5 = add i4 %3, %4
317 ret i4 %5
318}
319
320define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
321; SSE2-LABEL: bitcast_v16i16_to_v2i8:
322; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000323; SSE2-NEXT: packsswb %xmm1, %xmm0
324; SSE2-NEXT: pmovmskb %xmm0, %eax
325; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000326; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
327; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
328; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
329; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
330; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
331; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
332; SSE2-NEXT: retq
333;
334; SSSE3-LABEL: bitcast_v16i16_to_v2i8:
335; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000336; SSSE3-NEXT: packsswb %xmm1, %xmm0
337; SSSE3-NEXT: pmovmskb %xmm0, %eax
338; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000339; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
340; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
341; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
342; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
343; SSSE3-NEXT: retq
344;
345; AVX1-LABEL: bitcast_v16i16_to_v2i8:
346; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000347; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
348; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
349; AVX1-NEXT: vpmovmskb %xmm0, %eax
350; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000351; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
352; AVX1-NEXT: vpextrb $1, %xmm0, %eax
353; AVX1-NEXT: addb %cl, %al
354; AVX1-NEXT: # kill: def $al killed $al killed $eax
355; AVX1-NEXT: vzeroupper
356; AVX1-NEXT: retq
357;
358; AVX2-LABEL: bitcast_v16i16_to_v2i8:
359; AVX2: # %bb.0:
360; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
361; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000362; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
363; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
364; AVX2-NEXT: vpmovmskb %xmm0, %eax
365; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000366; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
367; AVX2-NEXT: vpextrb $1, %xmm0, %eax
368; AVX2-NEXT: addb %cl, %al
369; AVX2-NEXT: # kill: def $al killed $al killed $eax
370; AVX2-NEXT: vzeroupper
371; AVX2-NEXT: retq
372;
373; AVX512-LABEL: bitcast_v16i16_to_v2i8:
374; AVX512: # %bb.0:
375; AVX512-NEXT: vpmovw2m %ymm0, %k0
376; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
377; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
378; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
379; AVX512-NEXT: vpextrb $1, %xmm0, %eax
380; AVX512-NEXT: addb %cl, %al
381; AVX512-NEXT: # kill: def $al killed $al killed $eax
382; AVX512-NEXT: vzeroupper
383; AVX512-NEXT: retq
384 %1 = icmp slt <16 x i16> %a0, zeroinitializer
385 %2 = bitcast <16 x i1> %1 to <2 x i8>
386 %3 = extractelement <2 x i8> %2, i32 0
387 %4 = extractelement <2 x i8> %2, i32 1
388 %5 = add i8 %3, %4
389 ret i8 %5
390}
391
392define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
393; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
394; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000395; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
396; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
397; SSE2-SSSE3-NEXT: shll $16, %ecx
398; SSE2-SSSE3-NEXT: orl %eax, %ecx
399; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
400; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
401; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
402; SSE2-SSSE3-NEXT: addl %ecx, %eax
403; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000404; SSE2-SSSE3-NEXT: retq
405;
406; AVX1-LABEL: bitcast_v32i8_to_v2i16:
407; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000408; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000409; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000410; AVX1-NEXT: vpmovmskb %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000411; AVX1-NEXT: shll $16, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000412; AVX1-NEXT: orl %eax, %ecx
413; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000414; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
415; AVX1-NEXT: vpextrw $1, %xmm0, %eax
416; AVX1-NEXT: addl %ecx, %eax
417; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000418; AVX1-NEXT: vzeroupper
419; AVX1-NEXT: retq
420;
421; AVX2-LABEL: bitcast_v32i8_to_v2i16:
422; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000423; AVX2-NEXT: vpmovmskb %ymm0, %eax
424; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000425; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
426; AVX2-NEXT: vpextrw $1, %xmm0, %eax
427; AVX2-NEXT: addl %ecx, %eax
428; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000429; AVX2-NEXT: vzeroupper
430; AVX2-NEXT: retq
431;
432; AVX512-LABEL: bitcast_v32i8_to_v2i16:
433; AVX512: # %bb.0:
434; AVX512-NEXT: pushq %rbp
435; AVX512-NEXT: movq %rsp, %rbp
436; AVX512-NEXT: andq $-32, %rsp
437; AVX512-NEXT: subq $32, %rsp
438; AVX512-NEXT: vpmovb2m %ymm0, %k0
439; AVX512-NEXT: kmovd %k0, (%rsp)
440; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
441; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
442; AVX512-NEXT: vpextrw $1, %xmm0, %eax
443; AVX512-NEXT: addl %ecx, %eax
444; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
445; AVX512-NEXT: movq %rbp, %rsp
446; AVX512-NEXT: popq %rbp
447; AVX512-NEXT: vzeroupper
448; AVX512-NEXT: retq
449 %1 = icmp slt <32 x i8> %a0, zeroinitializer
450 %2 = bitcast <32 x i1> %1 to <2 x i16>
451 %3 = extractelement <2 x i16> %2, i32 0
452 %4 = extractelement <2 x i16> %2, i32 1
453 %5 = add i16 %3, %4
454 ret i16 %5
455}
456
457;
458; 512-bit vectors
459;
460
461define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
462; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
463; SSE2-SSSE3: # %bb.0:
464; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000465; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000466; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000467; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000468; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000469; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
470; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
471; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000472; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000473; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
474; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
475; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
476; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
477; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
478; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
479; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
480; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
481; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
482; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
483; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000484; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000485; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
486; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
487; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000488; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
489; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
490; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000491; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
492; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
493; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
494; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
495; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
496; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
497; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000498; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000499; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
500; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
501; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
502; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
503; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
504; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
505; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
506; SSE2-SSSE3-NEXT: movzbl %al, %ecx
507; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000508; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
509; SSE2-SSSE3-NEXT: andl $15, %eax
510; SSE2-SSSE3-NEXT: movq %rax, %xmm1
511; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
512; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
513; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
514; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
515; SSE2-SSSE3-NEXT: retq
516;
517; AVX1-LABEL: bitcast_v8i64_to_v2i4:
518; AVX1: # %bb.0:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000519; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000520; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000521; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
522; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
523; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000524; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
525; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
526; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
527; AVX1-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000528; AVX1-NEXT: movl %eax, %ecx
529; AVX1-NEXT: shrl $4, %ecx
530; AVX1-NEXT: vmovd %ecx, %xmm0
531; AVX1-NEXT: andl $15, %eax
532; AVX1-NEXT: vmovd %eax, %xmm1
533; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
534; AVX1-NEXT: vpextrb $0, %xmm0, %eax
535; AVX1-NEXT: addb %cl, %al
536; AVX1-NEXT: # kill: def $al killed $al killed $eax
537; AVX1-NEXT: vzeroupper
538; AVX1-NEXT: retq
539;
540; AVX2-LABEL: bitcast_v8i64_to_v2i4:
541; AVX2: # %bb.0:
542; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
543; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000544; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000545; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
546; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
547; AVX2-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000548; AVX2-NEXT: movl %eax, %ecx
549; AVX2-NEXT: shrl $4, %ecx
550; AVX2-NEXT: vmovd %ecx, %xmm0
551; AVX2-NEXT: andl $15, %eax
552; AVX2-NEXT: vmovd %eax, %xmm1
553; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
554; AVX2-NEXT: vpextrb $0, %xmm0, %eax
555; AVX2-NEXT: addb %cl, %al
556; AVX2-NEXT: # kill: def $al killed $al killed $eax
557; AVX2-NEXT: vzeroupper
558; AVX2-NEXT: retq
559;
560; AVX512-LABEL: bitcast_v8i64_to_v2i4:
561; AVX512: # %bb.0:
562; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
563; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
564; AVX512-NEXT: kmovd %k0, %eax
565; AVX512-NEXT: movzbl %al, %ecx
566; AVX512-NEXT: shrl $4, %ecx
567; AVX512-NEXT: vmovd %ecx, %xmm0
568; AVX512-NEXT: andl $15, %eax
569; AVX512-NEXT: vmovd %eax, %xmm1
570; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
571; AVX512-NEXT: vpextrb $0, %xmm0, %eax
572; AVX512-NEXT: addb %cl, %al
573; AVX512-NEXT: # kill: def $al killed $al killed $eax
574; AVX512-NEXT: vzeroupper
575; AVX512-NEXT: retq
576 %1 = icmp slt <8 x i64> %a0, zeroinitializer
577 %2 = bitcast <8 x i1> %1 to <2 x i4>
578 %3 = extractelement <2 x i4> %2, i32 0
579 %4 = extractelement <2 x i4> %2, i32 1
580 %5 = add i4 %3, %4
581 ret i4 %5
582}
583
584define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
585; SSE2-LABEL: bitcast_v16i32_to_v2i8:
586; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000587; SSE2-NEXT: packssdw %xmm3, %xmm2
588; SSE2-NEXT: packssdw %xmm1, %xmm0
589; SSE2-NEXT: packsswb %xmm2, %xmm0
590; SSE2-NEXT: pmovmskb %xmm0, %eax
591; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000592; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
593; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
594; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
595; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
596; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
597; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
598; SSE2-NEXT: retq
599;
600; SSSE3-LABEL: bitcast_v16i32_to_v2i8:
601; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000602; SSSE3-NEXT: packssdw %xmm3, %xmm2
603; SSSE3-NEXT: packssdw %xmm1, %xmm0
604; SSSE3-NEXT: packsswb %xmm2, %xmm0
605; SSSE3-NEXT: pmovmskb %xmm0, %eax
606; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000607; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
608; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
609; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
610; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
611; SSSE3-NEXT: retq
612;
613; AVX1-LABEL: bitcast_v16i32_to_v2i8:
614; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000615; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
616; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
617; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
618; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
619; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
620; AVX1-NEXT: vpmovmskb %xmm0, %eax
621; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000622; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
623; AVX1-NEXT: vpextrb $1, %xmm0, %eax
624; AVX1-NEXT: addb %cl, %al
625; AVX1-NEXT: # kill: def $al killed $al killed $eax
626; AVX1-NEXT: vzeroupper
627; AVX1-NEXT: retq
628;
629; AVX2-LABEL: bitcast_v16i32_to_v2i8:
630; AVX2: # %bb.0:
631; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000632; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000633; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000634; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
635; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
636; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
637; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
638; AVX2-NEXT: vpmovmskb %xmm0, %eax
639; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000640; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
641; AVX2-NEXT: vpextrb $1, %xmm0, %eax
642; AVX2-NEXT: addb %cl, %al
643; AVX2-NEXT: # kill: def $al killed $al killed $eax
644; AVX2-NEXT: vzeroupper
645; AVX2-NEXT: retq
646;
647; AVX512-LABEL: bitcast_v16i32_to_v2i8:
648; AVX512: # %bb.0:
649; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
650; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
651; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
652; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
653; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
654; AVX512-NEXT: vpextrb $1, %xmm0, %eax
655; AVX512-NEXT: addb %cl, %al
656; AVX512-NEXT: # kill: def $al killed $al killed $eax
657; AVX512-NEXT: vzeroupper
658; AVX512-NEXT: retq
659 %1 = icmp slt <16 x i32> %a0, zeroinitializer
660 %2 = bitcast <16 x i1> %1 to <2 x i8>
661 %3 = extractelement <2 x i8> %2, i32 0
662 %4 = extractelement <2 x i8> %2, i32 1
663 %5 = add i8 %3, %4
664 ret i8 %5
665}
666
667define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
668; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
669; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000670; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
671; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
672; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
673; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
674; SSE2-SSSE3-NEXT: shll $16, %ecx
675; SSE2-SSSE3-NEXT: orl %eax, %ecx
676; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
677; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
678; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
679; SSE2-SSSE3-NEXT: addl %ecx, %eax
680; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000681; SSE2-SSSE3-NEXT: retq
682;
683; AVX1-LABEL: bitcast_v32i16_to_v2i16:
684; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000685; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
686; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
687; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000688; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000689; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
690; AVX1-NEXT: vpmovmskb %xmm0, %ecx
691; AVX1-NEXT: shll $16, %ecx
692; AVX1-NEXT: orl %eax, %ecx
693; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000694; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
695; AVX1-NEXT: vpextrw $1, %xmm0, %eax
696; AVX1-NEXT: addl %ecx, %eax
697; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000698; AVX1-NEXT: vzeroupper
699; AVX1-NEXT: retq
700;
701; AVX2-LABEL: bitcast_v32i16_to_v2i16:
702; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000703; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
704; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
705; AVX2-NEXT: vpmovmskb %ymm0, %eax
706; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000707; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
708; AVX2-NEXT: vpextrw $1, %xmm0, %eax
709; AVX2-NEXT: addl %ecx, %eax
710; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000711; AVX2-NEXT: vzeroupper
712; AVX2-NEXT: retq
713;
714; AVX512-LABEL: bitcast_v32i16_to_v2i16:
715; AVX512: # %bb.0:
716; AVX512-NEXT: pushq %rbp
717; AVX512-NEXT: movq %rsp, %rbp
718; AVX512-NEXT: andq $-32, %rsp
719; AVX512-NEXT: subq $32, %rsp
720; AVX512-NEXT: vpmovw2m %zmm0, %k0
721; AVX512-NEXT: kmovd %k0, (%rsp)
722; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
723; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
724; AVX512-NEXT: vpextrw $1, %xmm0, %eax
725; AVX512-NEXT: addl %ecx, %eax
726; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
727; AVX512-NEXT: movq %rbp, %rsp
728; AVX512-NEXT: popq %rbp
729; AVX512-NEXT: vzeroupper
730; AVX512-NEXT: retq
731 %1 = icmp slt <32 x i16> %a0, zeroinitializer
732 %2 = bitcast <32 x i1> %1 to <2 x i16>
733 %3 = extractelement <2 x i16> %2, i32 0
734 %4 = extractelement <2 x i16> %2, i32 1
735 %5 = add i16 %3, %4
736 ret i16 %5
737}
738
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000739define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
740; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000741; SSE2-SSSE3: # %bb.0:
Craig Topperf9c30ed2019-04-25 18:19:59 +0000742; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4
743; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
744; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm5
745; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
746; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
747; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm3
748; SSE2-SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
749; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
750; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm2
751; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
752; SSE2-SSSE3-NEXT: pcmpgtb %xmm0, %xmm4
753; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
754; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
755; SSE2-SSSE3-NEXT: andl $1, %eax
756; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
757; SSE2-SSSE3-NEXT: andl $1, %ecx
758; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
759; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
760; SSE2-SSSE3-NEXT: andl $1, %ecx
761; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
762; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
763; SSE2-SSSE3-NEXT: andl $1, %ecx
764; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
765; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
766; SSE2-SSSE3-NEXT: andl $1, %ecx
767; SSE2-SSSE3-NEXT: shll $4, %ecx
768; SSE2-SSSE3-NEXT: orl %eax, %ecx
769; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
770; SSE2-SSSE3-NEXT: andl $1, %eax
771; SSE2-SSSE3-NEXT: shll $5, %eax
772; SSE2-SSSE3-NEXT: orl %ecx, %eax
773; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
774; SSE2-SSSE3-NEXT: andl $1, %ecx
775; SSE2-SSSE3-NEXT: shll $6, %ecx
776; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
777; SSE2-SSSE3-NEXT: andl $1, %edx
778; SSE2-SSSE3-NEXT: shll $7, %edx
779; SSE2-SSSE3-NEXT: orl %ecx, %edx
780; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
781; SSE2-SSSE3-NEXT: andl $1, %ecx
782; SSE2-SSSE3-NEXT: shll $8, %ecx
783; SSE2-SSSE3-NEXT: orl %edx, %ecx
784; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
785; SSE2-SSSE3-NEXT: andl $1, %edx
786; SSE2-SSSE3-NEXT: shll $9, %edx
787; SSE2-SSSE3-NEXT: orl %ecx, %edx
788; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
789; SSE2-SSSE3-NEXT: andl $1, %ecx
790; SSE2-SSSE3-NEXT: shll $10, %ecx
791; SSE2-SSSE3-NEXT: orl %edx, %ecx
792; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
793; SSE2-SSSE3-NEXT: andl $1, %edx
794; SSE2-SSSE3-NEXT: shll $11, %edx
795; SSE2-SSSE3-NEXT: orl %ecx, %edx
796; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
797; SSE2-SSSE3-NEXT: andl $1, %ecx
798; SSE2-SSSE3-NEXT: shll $12, %ecx
799; SSE2-SSSE3-NEXT: orl %edx, %ecx
800; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
801; SSE2-SSSE3-NEXT: andl $1, %edx
802; SSE2-SSSE3-NEXT: shll $13, %edx
803; SSE2-SSSE3-NEXT: orl %ecx, %edx
804; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
805; SSE2-SSSE3-NEXT: andl $1, %ecx
806; SSE2-SSSE3-NEXT: shll $14, %ecx
807; SSE2-SSSE3-NEXT: orl %edx, %ecx
808; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
809; SSE2-SSSE3-NEXT: shll $15, %edx
810; SSE2-SSSE3-NEXT: orl %ecx, %edx
811; SSE2-SSSE3-NEXT: orl %eax, %edx
812; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
813; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
814; SSE2-SSSE3-NEXT: andl $1, %eax
815; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
816; SSE2-SSSE3-NEXT: andl $1, %ecx
817; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
818; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
819; SSE2-SSSE3-NEXT: andl $1, %ecx
820; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
821; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
822; SSE2-SSSE3-NEXT: andl $1, %ecx
823; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
824; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
825; SSE2-SSSE3-NEXT: andl $1, %ecx
826; SSE2-SSSE3-NEXT: shll $4, %ecx
827; SSE2-SSSE3-NEXT: orl %eax, %ecx
828; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
829; SSE2-SSSE3-NEXT: andl $1, %eax
830; SSE2-SSSE3-NEXT: shll $5, %eax
831; SSE2-SSSE3-NEXT: orl %ecx, %eax
832; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
833; SSE2-SSSE3-NEXT: andl $1, %ecx
834; SSE2-SSSE3-NEXT: shll $6, %ecx
835; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
836; SSE2-SSSE3-NEXT: andl $1, %edx
837; SSE2-SSSE3-NEXT: shll $7, %edx
838; SSE2-SSSE3-NEXT: orl %ecx, %edx
839; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
840; SSE2-SSSE3-NEXT: andl $1, %ecx
841; SSE2-SSSE3-NEXT: shll $8, %ecx
842; SSE2-SSSE3-NEXT: orl %edx, %ecx
843; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
844; SSE2-SSSE3-NEXT: andl $1, %edx
845; SSE2-SSSE3-NEXT: shll $9, %edx
846; SSE2-SSSE3-NEXT: orl %ecx, %edx
847; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
848; SSE2-SSSE3-NEXT: andl $1, %ecx
849; SSE2-SSSE3-NEXT: shll $10, %ecx
850; SSE2-SSSE3-NEXT: orl %edx, %ecx
851; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
852; SSE2-SSSE3-NEXT: andl $1, %edx
853; SSE2-SSSE3-NEXT: shll $11, %edx
854; SSE2-SSSE3-NEXT: orl %ecx, %edx
855; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
856; SSE2-SSSE3-NEXT: andl $1, %ecx
857; SSE2-SSSE3-NEXT: shll $12, %ecx
858; SSE2-SSSE3-NEXT: orl %edx, %ecx
859; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
860; SSE2-SSSE3-NEXT: andl $1, %edx
861; SSE2-SSSE3-NEXT: shll $13, %edx
862; SSE2-SSSE3-NEXT: orl %ecx, %edx
863; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
864; SSE2-SSSE3-NEXT: andl $1, %ecx
865; SSE2-SSSE3-NEXT: shll $14, %ecx
866; SSE2-SSSE3-NEXT: orl %edx, %ecx
867; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
868; SSE2-SSSE3-NEXT: shll $15, %edx
869; SSE2-SSSE3-NEXT: orl %ecx, %edx
870; SSE2-SSSE3-NEXT: orl %eax, %edx
871; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
872; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
873; SSE2-SSSE3-NEXT: andl $1, %eax
874; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
875; SSE2-SSSE3-NEXT: andl $1, %ecx
876; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
877; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
878; SSE2-SSSE3-NEXT: andl $1, %ecx
879; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
880; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
881; SSE2-SSSE3-NEXT: andl $1, %ecx
882; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
883; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
884; SSE2-SSSE3-NEXT: andl $1, %ecx
885; SSE2-SSSE3-NEXT: shll $4, %ecx
886; SSE2-SSSE3-NEXT: orl %eax, %ecx
887; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
888; SSE2-SSSE3-NEXT: andl $1, %eax
889; SSE2-SSSE3-NEXT: shll $5, %eax
890; SSE2-SSSE3-NEXT: orl %ecx, %eax
891; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
892; SSE2-SSSE3-NEXT: andl $1, %ecx
893; SSE2-SSSE3-NEXT: shll $6, %ecx
894; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
895; SSE2-SSSE3-NEXT: andl $1, %edx
896; SSE2-SSSE3-NEXT: shll $7, %edx
897; SSE2-SSSE3-NEXT: orl %ecx, %edx
898; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
899; SSE2-SSSE3-NEXT: andl $1, %ecx
900; SSE2-SSSE3-NEXT: shll $8, %ecx
901; SSE2-SSSE3-NEXT: orl %edx, %ecx
902; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
903; SSE2-SSSE3-NEXT: andl $1, %edx
904; SSE2-SSSE3-NEXT: shll $9, %edx
905; SSE2-SSSE3-NEXT: orl %ecx, %edx
906; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
907; SSE2-SSSE3-NEXT: andl $1, %ecx
908; SSE2-SSSE3-NEXT: shll $10, %ecx
909; SSE2-SSSE3-NEXT: orl %edx, %ecx
910; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
911; SSE2-SSSE3-NEXT: andl $1, %edx
912; SSE2-SSSE3-NEXT: shll $11, %edx
913; SSE2-SSSE3-NEXT: orl %ecx, %edx
914; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
915; SSE2-SSSE3-NEXT: andl $1, %ecx
916; SSE2-SSSE3-NEXT: shll $12, %ecx
917; SSE2-SSSE3-NEXT: orl %edx, %ecx
918; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
919; SSE2-SSSE3-NEXT: andl $1, %edx
920; SSE2-SSSE3-NEXT: shll $13, %edx
921; SSE2-SSSE3-NEXT: orl %ecx, %edx
922; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
923; SSE2-SSSE3-NEXT: andl $1, %ecx
924; SSE2-SSSE3-NEXT: shll $14, %ecx
925; SSE2-SSSE3-NEXT: orl %edx, %ecx
926; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
927; SSE2-SSSE3-NEXT: shll $15, %edx
928; SSE2-SSSE3-NEXT: orl %ecx, %edx
929; SSE2-SSSE3-NEXT: orl %eax, %edx
930; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
931; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
932; SSE2-SSSE3-NEXT: andl $1, %eax
933; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
934; SSE2-SSSE3-NEXT: andl $1, %ecx
935; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
936; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
937; SSE2-SSSE3-NEXT: andl $1, %ecx
938; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
939; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
940; SSE2-SSSE3-NEXT: andl $1, %ecx
941; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
942; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
943; SSE2-SSSE3-NEXT: andl $1, %ecx
944; SSE2-SSSE3-NEXT: shll $4, %ecx
945; SSE2-SSSE3-NEXT: orl %eax, %ecx
946; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
947; SSE2-SSSE3-NEXT: andl $1, %eax
948; SSE2-SSSE3-NEXT: shll $5, %eax
949; SSE2-SSSE3-NEXT: orl %ecx, %eax
950; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
951; SSE2-SSSE3-NEXT: andl $1, %ecx
952; SSE2-SSSE3-NEXT: shll $6, %ecx
953; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
954; SSE2-SSSE3-NEXT: andl $1, %edx
955; SSE2-SSSE3-NEXT: shll $7, %edx
956; SSE2-SSSE3-NEXT: orl %ecx, %edx
957; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
958; SSE2-SSSE3-NEXT: andl $1, %ecx
959; SSE2-SSSE3-NEXT: shll $8, %ecx
960; SSE2-SSSE3-NEXT: orl %edx, %ecx
961; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
962; SSE2-SSSE3-NEXT: andl $1, %edx
963; SSE2-SSSE3-NEXT: shll $9, %edx
964; SSE2-SSSE3-NEXT: orl %ecx, %edx
965; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
966; SSE2-SSSE3-NEXT: andl $1, %ecx
967; SSE2-SSSE3-NEXT: shll $10, %ecx
968; SSE2-SSSE3-NEXT: orl %edx, %ecx
969; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
970; SSE2-SSSE3-NEXT: andl $1, %edx
971; SSE2-SSSE3-NEXT: shll $11, %edx
972; SSE2-SSSE3-NEXT: orl %ecx, %edx
973; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
974; SSE2-SSSE3-NEXT: andl $1, %ecx
975; SSE2-SSSE3-NEXT: shll $12, %ecx
976; SSE2-SSSE3-NEXT: orl %edx, %ecx
977; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
978; SSE2-SSSE3-NEXT: andl $1, %edx
979; SSE2-SSSE3-NEXT: shll $13, %edx
980; SSE2-SSSE3-NEXT: orl %ecx, %edx
981; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
982; SSE2-SSSE3-NEXT: andl $1, %ecx
983; SSE2-SSSE3-NEXT: shll $14, %ecx
984; SSE2-SSSE3-NEXT: orl %edx, %ecx
985; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
986; SSE2-SSSE3-NEXT: shll $15, %edx
987; SSE2-SSSE3-NEXT: orl %ecx, %edx
988; SSE2-SSSE3-NEXT: orl %eax, %edx
989; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
990; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
991; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
992; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1]
993; SSE2-SSSE3-NEXT: movd %xmm0, %eax
994; SSE2-SSSE3-NEXT: addl %ecx, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000995; SSE2-SSSE3-NEXT: retq
996;
Craig Topperf9c30ed2019-04-25 18:19:59 +0000997; AVX1-LABEL: bitcast_v64i8_to_v2i32:
998; AVX1: # %bb.0:
999; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1000; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
1001; AVX1-NEXT: vpextrb $1, %xmm3, %eax
1002; AVX1-NEXT: andl $1, %eax
1003; AVX1-NEXT: vpextrb $0, %xmm3, %ecx
1004; AVX1-NEXT: andl $1, %ecx
1005; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1006; AVX1-NEXT: vpextrb $2, %xmm3, %ecx
1007; AVX1-NEXT: andl $1, %ecx
1008; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1009; AVX1-NEXT: vpextrb $3, %xmm3, %ecx
1010; AVX1-NEXT: andl $1, %ecx
1011; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1012; AVX1-NEXT: vpextrb $4, %xmm3, %ecx
1013; AVX1-NEXT: andl $1, %ecx
1014; AVX1-NEXT: shll $4, %ecx
1015; AVX1-NEXT: orl %eax, %ecx
1016; AVX1-NEXT: vpextrb $5, %xmm3, %eax
1017; AVX1-NEXT: andl $1, %eax
1018; AVX1-NEXT: shll $5, %eax
1019; AVX1-NEXT: orl %ecx, %eax
1020; AVX1-NEXT: vpextrb $6, %xmm3, %ecx
1021; AVX1-NEXT: andl $1, %ecx
1022; AVX1-NEXT: shll $6, %ecx
1023; AVX1-NEXT: vpextrb $7, %xmm3, %edx
1024; AVX1-NEXT: andl $1, %edx
1025; AVX1-NEXT: shll $7, %edx
1026; AVX1-NEXT: orl %ecx, %edx
1027; AVX1-NEXT: vpextrb $8, %xmm3, %ecx
1028; AVX1-NEXT: andl $1, %ecx
1029; AVX1-NEXT: shll $8, %ecx
1030; AVX1-NEXT: orl %edx, %ecx
1031; AVX1-NEXT: vpextrb $9, %xmm3, %edx
1032; AVX1-NEXT: andl $1, %edx
1033; AVX1-NEXT: shll $9, %edx
1034; AVX1-NEXT: orl %ecx, %edx
1035; AVX1-NEXT: vpextrb $10, %xmm3, %ecx
1036; AVX1-NEXT: andl $1, %ecx
1037; AVX1-NEXT: shll $10, %ecx
1038; AVX1-NEXT: orl %edx, %ecx
1039; AVX1-NEXT: vpextrb $11, %xmm3, %edx
1040; AVX1-NEXT: andl $1, %edx
1041; AVX1-NEXT: shll $11, %edx
1042; AVX1-NEXT: orl %ecx, %edx
1043; AVX1-NEXT: vpextrb $12, %xmm3, %ecx
1044; AVX1-NEXT: andl $1, %ecx
1045; AVX1-NEXT: shll $12, %ecx
1046; AVX1-NEXT: orl %edx, %ecx
1047; AVX1-NEXT: vpextrb $13, %xmm3, %edx
1048; AVX1-NEXT: andl $1, %edx
1049; AVX1-NEXT: shll $13, %edx
1050; AVX1-NEXT: orl %ecx, %edx
1051; AVX1-NEXT: vpextrb $14, %xmm3, %ecx
1052; AVX1-NEXT: andl $1, %ecx
1053; AVX1-NEXT: shll $14, %ecx
1054; AVX1-NEXT: orl %edx, %ecx
1055; AVX1-NEXT: vpextrb $15, %xmm3, %edx
1056; AVX1-NEXT: andl $1, %edx
1057; AVX1-NEXT: shll $15, %edx
1058; AVX1-NEXT: orl %ecx, %edx
1059; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1060; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1061; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1062; AVX1-NEXT: andl $1, %ecx
1063; AVX1-NEXT: shll $16, %ecx
1064; AVX1-NEXT: orl %edx, %ecx
1065; AVX1-NEXT: vpextrb $1, %xmm1, %edx
1066; AVX1-NEXT: andl $1, %edx
1067; AVX1-NEXT: shll $17, %edx
1068; AVX1-NEXT: orl %ecx, %edx
1069; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1070; AVX1-NEXT: andl $1, %ecx
1071; AVX1-NEXT: shll $18, %ecx
1072; AVX1-NEXT: orl %edx, %ecx
1073; AVX1-NEXT: vpextrb $3, %xmm1, %edx
1074; AVX1-NEXT: andl $1, %edx
1075; AVX1-NEXT: shll $19, %edx
1076; AVX1-NEXT: orl %ecx, %edx
1077; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1078; AVX1-NEXT: andl $1, %ecx
1079; AVX1-NEXT: shll $20, %ecx
1080; AVX1-NEXT: orl %edx, %ecx
1081; AVX1-NEXT: vpextrb $5, %xmm1, %edx
1082; AVX1-NEXT: andl $1, %edx
1083; AVX1-NEXT: shll $21, %edx
1084; AVX1-NEXT: orl %ecx, %edx
1085; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1086; AVX1-NEXT: andl $1, %ecx
1087; AVX1-NEXT: shll $22, %ecx
1088; AVX1-NEXT: orl %edx, %ecx
1089; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1090; AVX1-NEXT: andl $1, %edx
1091; AVX1-NEXT: shll $23, %edx
1092; AVX1-NEXT: orl %ecx, %edx
1093; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1094; AVX1-NEXT: andl $1, %ecx
1095; AVX1-NEXT: shll $24, %ecx
1096; AVX1-NEXT: orl %edx, %ecx
1097; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1098; AVX1-NEXT: andl $1, %edx
1099; AVX1-NEXT: shll $25, %edx
1100; AVX1-NEXT: orl %ecx, %edx
1101; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1102; AVX1-NEXT: andl $1, %ecx
1103; AVX1-NEXT: shll $26, %ecx
1104; AVX1-NEXT: orl %edx, %ecx
1105; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1106; AVX1-NEXT: andl $1, %edx
1107; AVX1-NEXT: shll $27, %edx
1108; AVX1-NEXT: orl %ecx, %edx
1109; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1110; AVX1-NEXT: andl $1, %ecx
1111; AVX1-NEXT: shll $28, %ecx
1112; AVX1-NEXT: orl %edx, %ecx
1113; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1114; AVX1-NEXT: andl $1, %edx
1115; AVX1-NEXT: shll $29, %edx
1116; AVX1-NEXT: orl %ecx, %edx
1117; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1118; AVX1-NEXT: andl $1, %ecx
1119; AVX1-NEXT: shll $30, %ecx
1120; AVX1-NEXT: orl %edx, %ecx
1121; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1122; AVX1-NEXT: shll $31, %edx
1123; AVX1-NEXT: orl %ecx, %edx
1124; AVX1-NEXT: orl %eax, %edx
1125; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1126; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm1
1127; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1128; AVX1-NEXT: andl $1, %eax
1129; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1130; AVX1-NEXT: andl $1, %ecx
1131; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1132; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1133; AVX1-NEXT: andl $1, %ecx
1134; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1135; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
1136; AVX1-NEXT: andl $1, %ecx
1137; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1138; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1139; AVX1-NEXT: andl $1, %ecx
1140; AVX1-NEXT: shll $4, %ecx
1141; AVX1-NEXT: orl %eax, %ecx
1142; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1143; AVX1-NEXT: andl $1, %eax
1144; AVX1-NEXT: shll $5, %eax
1145; AVX1-NEXT: orl %ecx, %eax
1146; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1147; AVX1-NEXT: andl $1, %ecx
1148; AVX1-NEXT: shll $6, %ecx
1149; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1150; AVX1-NEXT: andl $1, %edx
1151; AVX1-NEXT: shll $7, %edx
1152; AVX1-NEXT: orl %ecx, %edx
1153; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1154; AVX1-NEXT: andl $1, %ecx
1155; AVX1-NEXT: shll $8, %ecx
1156; AVX1-NEXT: orl %edx, %ecx
1157; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1158; AVX1-NEXT: andl $1, %edx
1159; AVX1-NEXT: shll $9, %edx
1160; AVX1-NEXT: orl %ecx, %edx
1161; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1162; AVX1-NEXT: andl $1, %ecx
1163; AVX1-NEXT: shll $10, %ecx
1164; AVX1-NEXT: orl %edx, %ecx
1165; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1166; AVX1-NEXT: andl $1, %edx
1167; AVX1-NEXT: shll $11, %edx
1168; AVX1-NEXT: orl %ecx, %edx
1169; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1170; AVX1-NEXT: andl $1, %ecx
1171; AVX1-NEXT: shll $12, %ecx
1172; AVX1-NEXT: orl %edx, %ecx
1173; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1174; AVX1-NEXT: andl $1, %edx
1175; AVX1-NEXT: shll $13, %edx
1176; AVX1-NEXT: orl %ecx, %edx
1177; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1178; AVX1-NEXT: andl $1, %ecx
1179; AVX1-NEXT: shll $14, %ecx
1180; AVX1-NEXT: orl %edx, %ecx
1181; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1182; AVX1-NEXT: andl $1, %edx
1183; AVX1-NEXT: shll $15, %edx
1184; AVX1-NEXT: orl %ecx, %edx
1185; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1186; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1187; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
1188; AVX1-NEXT: andl $1, %ecx
1189; AVX1-NEXT: shll $16, %ecx
1190; AVX1-NEXT: orl %edx, %ecx
1191; AVX1-NEXT: vpextrb $1, %xmm0, %edx
1192; AVX1-NEXT: andl $1, %edx
1193; AVX1-NEXT: shll $17, %edx
1194; AVX1-NEXT: orl %ecx, %edx
1195; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
1196; AVX1-NEXT: andl $1, %ecx
1197; AVX1-NEXT: shll $18, %ecx
1198; AVX1-NEXT: orl %edx, %ecx
1199; AVX1-NEXT: vpextrb $3, %xmm0, %edx
1200; AVX1-NEXT: andl $1, %edx
1201; AVX1-NEXT: shll $19, %edx
1202; AVX1-NEXT: orl %ecx, %edx
1203; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
1204; AVX1-NEXT: andl $1, %ecx
1205; AVX1-NEXT: shll $20, %ecx
1206; AVX1-NEXT: orl %edx, %ecx
1207; AVX1-NEXT: vpextrb $5, %xmm0, %edx
1208; AVX1-NEXT: andl $1, %edx
1209; AVX1-NEXT: shll $21, %edx
1210; AVX1-NEXT: orl %ecx, %edx
1211; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
1212; AVX1-NEXT: andl $1, %ecx
1213; AVX1-NEXT: shll $22, %ecx
1214; AVX1-NEXT: orl %edx, %ecx
1215; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1216; AVX1-NEXT: andl $1, %edx
1217; AVX1-NEXT: shll $23, %edx
1218; AVX1-NEXT: orl %ecx, %edx
1219; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1220; AVX1-NEXT: andl $1, %ecx
1221; AVX1-NEXT: shll $24, %ecx
1222; AVX1-NEXT: orl %edx, %ecx
1223; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1224; AVX1-NEXT: andl $1, %edx
1225; AVX1-NEXT: shll $25, %edx
1226; AVX1-NEXT: orl %ecx, %edx
1227; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1228; AVX1-NEXT: andl $1, %ecx
1229; AVX1-NEXT: shll $26, %ecx
1230; AVX1-NEXT: orl %edx, %ecx
1231; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1232; AVX1-NEXT: andl $1, %edx
1233; AVX1-NEXT: shll $27, %edx
1234; AVX1-NEXT: orl %ecx, %edx
1235; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1236; AVX1-NEXT: andl $1, %ecx
1237; AVX1-NEXT: shll $28, %ecx
1238; AVX1-NEXT: orl %edx, %ecx
1239; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1240; AVX1-NEXT: andl $1, %edx
1241; AVX1-NEXT: shll $29, %edx
1242; AVX1-NEXT: orl %ecx, %edx
1243; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1244; AVX1-NEXT: andl $1, %ecx
1245; AVX1-NEXT: shll $30, %ecx
1246; AVX1-NEXT: orl %edx, %ecx
1247; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1248; AVX1-NEXT: shll $31, %edx
1249; AVX1-NEXT: orl %ecx, %edx
1250; AVX1-NEXT: orl %eax, %edx
1251; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1252; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1253; AVX1-NEXT: vmovd %xmm0, %ecx
1254; AVX1-NEXT: vpextrd $1, %xmm0, %eax
1255; AVX1-NEXT: addl %ecx, %eax
1256; AVX1-NEXT: vzeroupper
1257; AVX1-NEXT: retq
1258;
1259; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1260; AVX2: # %bb.0:
1261; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1262; AVX2-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
1263; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1264; AVX2-NEXT: andl $1, %eax
1265; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1266; AVX2-NEXT: andl $1, %ecx
1267; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1268; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1269; AVX2-NEXT: andl $1, %ecx
1270; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1271; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1272; AVX2-NEXT: andl $1, %ecx
1273; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1274; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1275; AVX2-NEXT: andl $1, %ecx
1276; AVX2-NEXT: shll $4, %ecx
1277; AVX2-NEXT: orl %eax, %ecx
1278; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1279; AVX2-NEXT: andl $1, %eax
1280; AVX2-NEXT: shll $5, %eax
1281; AVX2-NEXT: orl %ecx, %eax
1282; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1283; AVX2-NEXT: andl $1, %ecx
1284; AVX2-NEXT: shll $6, %ecx
1285; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1286; AVX2-NEXT: andl $1, %edx
1287; AVX2-NEXT: shll $7, %edx
1288; AVX2-NEXT: orl %ecx, %edx
1289; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1290; AVX2-NEXT: andl $1, %ecx
1291; AVX2-NEXT: shll $8, %ecx
1292; AVX2-NEXT: orl %edx, %ecx
1293; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1294; AVX2-NEXT: andl $1, %edx
1295; AVX2-NEXT: shll $9, %edx
1296; AVX2-NEXT: orl %ecx, %edx
1297; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1298; AVX2-NEXT: andl $1, %ecx
1299; AVX2-NEXT: shll $10, %ecx
1300; AVX2-NEXT: orl %edx, %ecx
1301; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1302; AVX2-NEXT: andl $1, %edx
1303; AVX2-NEXT: shll $11, %edx
1304; AVX2-NEXT: orl %ecx, %edx
1305; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1306; AVX2-NEXT: andl $1, %ecx
1307; AVX2-NEXT: shll $12, %ecx
1308; AVX2-NEXT: orl %edx, %ecx
1309; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1310; AVX2-NEXT: andl $1, %edx
1311; AVX2-NEXT: shll $13, %edx
1312; AVX2-NEXT: orl %ecx, %edx
1313; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1314; AVX2-NEXT: andl $1, %ecx
1315; AVX2-NEXT: shll $14, %ecx
1316; AVX2-NEXT: orl %edx, %ecx
1317; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1318; AVX2-NEXT: andl $1, %edx
1319; AVX2-NEXT: shll $15, %edx
1320; AVX2-NEXT: orl %ecx, %edx
1321; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1322; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1323; AVX2-NEXT: andl $1, %ecx
1324; AVX2-NEXT: shll $16, %ecx
1325; AVX2-NEXT: orl %edx, %ecx
1326; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1327; AVX2-NEXT: andl $1, %edx
1328; AVX2-NEXT: shll $17, %edx
1329; AVX2-NEXT: orl %ecx, %edx
1330; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1331; AVX2-NEXT: andl $1, %ecx
1332; AVX2-NEXT: shll $18, %ecx
1333; AVX2-NEXT: orl %edx, %ecx
1334; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1335; AVX2-NEXT: andl $1, %edx
1336; AVX2-NEXT: shll $19, %edx
1337; AVX2-NEXT: orl %ecx, %edx
1338; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1339; AVX2-NEXT: andl $1, %ecx
1340; AVX2-NEXT: shll $20, %ecx
1341; AVX2-NEXT: orl %edx, %ecx
1342; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1343; AVX2-NEXT: andl $1, %edx
1344; AVX2-NEXT: shll $21, %edx
1345; AVX2-NEXT: orl %ecx, %edx
1346; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1347; AVX2-NEXT: andl $1, %ecx
1348; AVX2-NEXT: shll $22, %ecx
1349; AVX2-NEXT: orl %edx, %ecx
1350; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1351; AVX2-NEXT: andl $1, %edx
1352; AVX2-NEXT: shll $23, %edx
1353; AVX2-NEXT: orl %ecx, %edx
1354; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1355; AVX2-NEXT: andl $1, %ecx
1356; AVX2-NEXT: shll $24, %ecx
1357; AVX2-NEXT: orl %edx, %ecx
1358; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1359; AVX2-NEXT: andl $1, %edx
1360; AVX2-NEXT: shll $25, %edx
1361; AVX2-NEXT: orl %ecx, %edx
1362; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1363; AVX2-NEXT: andl $1, %ecx
1364; AVX2-NEXT: shll $26, %ecx
1365; AVX2-NEXT: orl %edx, %ecx
1366; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1367; AVX2-NEXT: andl $1, %edx
1368; AVX2-NEXT: shll $27, %edx
1369; AVX2-NEXT: orl %ecx, %edx
1370; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1371; AVX2-NEXT: andl $1, %ecx
1372; AVX2-NEXT: shll $28, %ecx
1373; AVX2-NEXT: orl %edx, %ecx
1374; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1375; AVX2-NEXT: andl $1, %edx
1376; AVX2-NEXT: shll $29, %edx
1377; AVX2-NEXT: orl %ecx, %edx
1378; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1379; AVX2-NEXT: andl $1, %ecx
1380; AVX2-NEXT: shll $30, %ecx
1381; AVX2-NEXT: orl %edx, %ecx
1382; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1383; AVX2-NEXT: shll $31, %edx
1384; AVX2-NEXT: orl %ecx, %edx
1385; AVX2-NEXT: orl %eax, %edx
1386; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1387; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1388; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1389; AVX2-NEXT: andl $1, %eax
1390; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1391; AVX2-NEXT: andl $1, %ecx
1392; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1393; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1394; AVX2-NEXT: andl $1, %ecx
1395; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1396; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1397; AVX2-NEXT: andl $1, %ecx
1398; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1399; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1400; AVX2-NEXT: andl $1, %ecx
1401; AVX2-NEXT: shll $4, %ecx
1402; AVX2-NEXT: orl %eax, %ecx
1403; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1404; AVX2-NEXT: andl $1, %eax
1405; AVX2-NEXT: shll $5, %eax
1406; AVX2-NEXT: orl %ecx, %eax
1407; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1408; AVX2-NEXT: andl $1, %ecx
1409; AVX2-NEXT: shll $6, %ecx
1410; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1411; AVX2-NEXT: andl $1, %edx
1412; AVX2-NEXT: shll $7, %edx
1413; AVX2-NEXT: orl %ecx, %edx
1414; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1415; AVX2-NEXT: andl $1, %ecx
1416; AVX2-NEXT: shll $8, %ecx
1417; AVX2-NEXT: orl %edx, %ecx
1418; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1419; AVX2-NEXT: andl $1, %edx
1420; AVX2-NEXT: shll $9, %edx
1421; AVX2-NEXT: orl %ecx, %edx
1422; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1423; AVX2-NEXT: andl $1, %ecx
1424; AVX2-NEXT: shll $10, %ecx
1425; AVX2-NEXT: orl %edx, %ecx
1426; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1427; AVX2-NEXT: andl $1, %edx
1428; AVX2-NEXT: shll $11, %edx
1429; AVX2-NEXT: orl %ecx, %edx
1430; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1431; AVX2-NEXT: andl $1, %ecx
1432; AVX2-NEXT: shll $12, %ecx
1433; AVX2-NEXT: orl %edx, %ecx
1434; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1435; AVX2-NEXT: andl $1, %edx
1436; AVX2-NEXT: shll $13, %edx
1437; AVX2-NEXT: orl %ecx, %edx
1438; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1439; AVX2-NEXT: andl $1, %ecx
1440; AVX2-NEXT: shll $14, %ecx
1441; AVX2-NEXT: orl %edx, %ecx
1442; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1443; AVX2-NEXT: andl $1, %edx
1444; AVX2-NEXT: shll $15, %edx
1445; AVX2-NEXT: orl %ecx, %edx
1446; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1447; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1448; AVX2-NEXT: andl $1, %ecx
1449; AVX2-NEXT: shll $16, %ecx
1450; AVX2-NEXT: orl %edx, %ecx
1451; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1452; AVX2-NEXT: andl $1, %edx
1453; AVX2-NEXT: shll $17, %edx
1454; AVX2-NEXT: orl %ecx, %edx
1455; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1456; AVX2-NEXT: andl $1, %ecx
1457; AVX2-NEXT: shll $18, %ecx
1458; AVX2-NEXT: orl %edx, %ecx
1459; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1460; AVX2-NEXT: andl $1, %edx
1461; AVX2-NEXT: shll $19, %edx
1462; AVX2-NEXT: orl %ecx, %edx
1463; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1464; AVX2-NEXT: andl $1, %ecx
1465; AVX2-NEXT: shll $20, %ecx
1466; AVX2-NEXT: orl %edx, %ecx
1467; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1468; AVX2-NEXT: andl $1, %edx
1469; AVX2-NEXT: shll $21, %edx
1470; AVX2-NEXT: orl %ecx, %edx
1471; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1472; AVX2-NEXT: andl $1, %ecx
1473; AVX2-NEXT: shll $22, %ecx
1474; AVX2-NEXT: orl %edx, %ecx
1475; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1476; AVX2-NEXT: andl $1, %edx
1477; AVX2-NEXT: shll $23, %edx
1478; AVX2-NEXT: orl %ecx, %edx
1479; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1480; AVX2-NEXT: andl $1, %ecx
1481; AVX2-NEXT: shll $24, %ecx
1482; AVX2-NEXT: orl %edx, %ecx
1483; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1484; AVX2-NEXT: andl $1, %edx
1485; AVX2-NEXT: shll $25, %edx
1486; AVX2-NEXT: orl %ecx, %edx
1487; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1488; AVX2-NEXT: andl $1, %ecx
1489; AVX2-NEXT: shll $26, %ecx
1490; AVX2-NEXT: orl %edx, %ecx
1491; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1492; AVX2-NEXT: andl $1, %edx
1493; AVX2-NEXT: shll $27, %edx
1494; AVX2-NEXT: orl %ecx, %edx
1495; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1496; AVX2-NEXT: andl $1, %ecx
1497; AVX2-NEXT: shll $28, %ecx
1498; AVX2-NEXT: orl %edx, %ecx
1499; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1500; AVX2-NEXT: andl $1, %edx
1501; AVX2-NEXT: shll $29, %edx
1502; AVX2-NEXT: orl %ecx, %edx
1503; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1504; AVX2-NEXT: andl $1, %ecx
1505; AVX2-NEXT: shll $30, %ecx
1506; AVX2-NEXT: orl %edx, %ecx
1507; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1508; AVX2-NEXT: shll $31, %edx
1509; AVX2-NEXT: orl %ecx, %edx
1510; AVX2-NEXT: orl %eax, %edx
1511; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1512; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1513; AVX2-NEXT: vmovd %xmm0, %ecx
1514; AVX2-NEXT: vpextrd $1, %xmm0, %eax
1515; AVX2-NEXT: addl %ecx, %eax
1516; AVX2-NEXT: vzeroupper
1517; AVX2-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001518;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +00001519; AVX512-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001520; AVX512: # %bb.0:
1521; AVX512-NEXT: vpmovb2m %zmm0, %k0
1522; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
1523; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1524; AVX512-NEXT: vmovd %xmm0, %ecx
1525; AVX512-NEXT: vpextrd $1, %xmm0, %eax
1526; AVX512-NEXT: addl %ecx, %eax
1527; AVX512-NEXT: vzeroupper
1528; AVX512-NEXT: retq
1529 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1530 %2 = bitcast <64 x i1> %1 to <2 x i32>
1531 %3 = extractelement <2 x i32> %2, i32 0
1532 %4 = extractelement <2 x i32> %2, i32 1
1533 %5 = add i32 %3, %4
1534 ret i32 %5
1535}