blob: ed487ef8266f0507e945791d3c2012906414297d [file] [log] [blame]
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000015; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
16; SSE2-SSSE3-NEXT: movl %ecx, %eax
17; SSE2-SSSE3-NEXT: shrb %al
18; SSE2-SSSE3-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000019; SSE2-SSSE3-NEXT: retq
20;
21; AVX12-LABEL: bitcast_v2i64_to_v2i1:
22; AVX12: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000023; AVX12-NEXT: vmovmskpd %xmm0, %ecx
24; AVX12-NEXT: movl %ecx, %eax
25; AVX12-NEXT: shrb %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000026; AVX12-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000027; AVX12-NEXT: retq
28;
29; AVX512-LABEL: bitcast_v2i64_to_v2i1:
30; AVX512: # %bb.0:
31; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
32; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
33; AVX512-NEXT: kshiftrw $1, %k0, %k1
34; AVX512-NEXT: kmovd %k1, %ecx
35; AVX512-NEXT: kmovd %k0, %eax
36; AVX512-NEXT: addb %cl, %al
37; AVX512-NEXT: # kill: def $al killed $al killed $eax
38; AVX512-NEXT: retq
39 %1 = icmp slt <2 x i64> %a0, zeroinitializer
40 %2 = bitcast <2 x i1> %1 to <2 x i1>
41 %3 = extractelement <2 x i1> %2, i32 0
42 %4 = extractelement <2 x i1> %2, i32 1
43 %5 = add i1 %3, %4
44 ret i1 %5
45}
46
47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000050; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000051; SSE2-SSSE3-NEXT: movl %eax, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000052; SSE2-SSSE3-NEXT: andl $3, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +000053; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000054; SSE2-SSSE3-NEXT: shrl $2, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000055; SSE2-SSSE3-NEXT: movq %rax, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000056; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
57; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
Simon Pilgrim10daecb2019-04-24 17:25:45 +000058; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
59; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
60; SSE2-SSSE3-NEXT: retq
61;
62; AVX12-LABEL: bitcast_v4i32_to_v2i2:
63; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000064; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000065; AVX12-NEXT: movl %eax, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +000066; AVX12-NEXT: andl $3, %ecx
67; AVX12-NEXT: vmovq %rcx, %xmm0
68; AVX12-NEXT: shrl $2, %eax
69; AVX12-NEXT: vmovq %rax, %xmm1
70; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
71; AVX12-NEXT: vpextrb $0, %xmm1, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000072; AVX12-NEXT: addb %cl, %al
73; AVX12-NEXT: # kill: def $al killed $al killed $eax
74; AVX12-NEXT: retq
75;
76; AVX512-LABEL: bitcast_v4i32_to_v2i2:
77; AVX512: # %bb.0:
78; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
79; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
80; AVX512-NEXT: kmovd %k0, %eax
81; AVX512-NEXT: movzbl %al, %ecx
82; AVX512-NEXT: shrl $2, %ecx
83; AVX512-NEXT: andl $3, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +000084; AVX512-NEXT: vmovq %rcx, %xmm0
85; AVX512-NEXT: movzwl %ax, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000086; AVX512-NEXT: andl $3, %eax
Craig Topper55a71b52019-05-06 19:29:24 +000087; AVX512-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +000088; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
89; AVX512-NEXT: vpextrb $0, %xmm0, %eax
90; AVX512-NEXT: addb %cl, %al
91; AVX512-NEXT: # kill: def $al killed $al killed $eax
92; AVX512-NEXT: retq
93 %1 = icmp slt <4 x i32> %a0, zeroinitializer
94 %2 = bitcast <4 x i1> %1 to <2 x i2>
95 %3 = extractelement <2 x i2> %2, i32 0
96 %4 = extractelement <2 x i2> %2, i32 1
97 %5 = add i2 %3, %4
98 ret i2 %5
99}
100
101define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
102; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
103; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000104; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
105; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
106; SSE2-SSSE3-NEXT: movzbl %al, %ecx
107; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000108; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
109; SSE2-SSSE3-NEXT: andl $15, %eax
110; SSE2-SSSE3-NEXT: movq %rax, %xmm1
111; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
112; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
113; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
114; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
115; SSE2-SSSE3-NEXT: retq
116;
117; AVX12-LABEL: bitcast_v8i16_to_v2i4:
118; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000119; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
120; AVX12-NEXT: vpmovmskb %xmm0, %eax
121; AVX12-NEXT: movzbl %al, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000122; AVX12-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000123; AVX12-NEXT: vmovq %rcx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000124; AVX12-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000125; AVX12-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000126; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
127; AVX12-NEXT: vpextrb $0, %xmm0, %eax
128; AVX12-NEXT: addb %cl, %al
129; AVX12-NEXT: # kill: def $al killed $al killed $eax
130; AVX12-NEXT: retq
131;
132; AVX512-LABEL: bitcast_v8i16_to_v2i4:
133; AVX512: # %bb.0:
134; AVX512-NEXT: vpmovw2m %xmm0, %k0
135; AVX512-NEXT: kmovd %k0, %eax
136; AVX512-NEXT: movzbl %al, %ecx
137; AVX512-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000138; AVX512-NEXT: vmovq %rcx, %xmm0
139; AVX512-NEXT: movzwl %ax, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000140; AVX512-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000141; AVX512-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000142; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
143; AVX512-NEXT: vpextrb $0, %xmm0, %eax
144; AVX512-NEXT: addb %cl, %al
145; AVX512-NEXT: # kill: def $al killed $al killed $eax
146; AVX512-NEXT: retq
147 %1 = icmp slt <8 x i16> %a0, zeroinitializer
148 %2 = bitcast <8 x i1> %1 to <2 x i4>
149 %3 = extractelement <2 x i4> %2, i32 0
150 %4 = extractelement <2 x i4> %2, i32 1
151 %5 = add i4 %3, %4
152 ret i4 %5
153}
154
155define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
156; SSE2-LABEL: bitcast_v16i8_to_v2i8:
157; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000158; SSE2-NEXT: pmovmskb %xmm0, %eax
159; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000160; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
161; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
162; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
163; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
164; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
165; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
166; SSE2-NEXT: retq
167;
168; SSSE3-LABEL: bitcast_v16i8_to_v2i8:
169; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000170; SSSE3-NEXT: pmovmskb %xmm0, %eax
171; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000172; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
173; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
174; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
175; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
176; SSSE3-NEXT: retq
177;
178; AVX12-LABEL: bitcast_v16i8_to_v2i8:
179; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000180; AVX12-NEXT: vpmovmskb %xmm0, %eax
181; AVX12-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000182; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
183; AVX12-NEXT: vpextrb $1, %xmm0, %eax
184; AVX12-NEXT: addb %cl, %al
185; AVX12-NEXT: # kill: def $al killed $al killed $eax
186; AVX12-NEXT: retq
187;
188; AVX512-LABEL: bitcast_v16i8_to_v2i8:
189; AVX512: # %bb.0:
190; AVX512-NEXT: vpmovb2m %xmm0, %k0
191; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
192; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
193; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
194; AVX512-NEXT: vpextrb $1, %xmm0, %eax
195; AVX512-NEXT: addb %cl, %al
196; AVX512-NEXT: # kill: def $al killed $al killed $eax
197; AVX512-NEXT: retq
198 %1 = icmp slt <16 x i8> %a0, zeroinitializer
199 %2 = bitcast <16 x i1> %1 to <2 x i8>
200 %3 = extractelement <2 x i8> %2, i32 0
201 %4 = extractelement <2 x i8> %2, i32 1
202 %5 = add i8 %3, %4
203 ret i8 %5
204}
205
206;
207; 256-bit vectors
208;
209
210define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
211; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
212; SSE2-SSSE3: # %bb.0:
Craig Topper55a71b52019-05-06 19:29:24 +0000213; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
214; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
215; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
216; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
217; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
218; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
219; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
220; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
221; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
222; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
223; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
224; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
225; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
226; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
227; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
228; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000229; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
230; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000231; SSE2-SSSE3-NEXT: movl %eax, %ecx
232; SSE2-SSSE3-NEXT: shrl $2, %ecx
233; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
234; SSE2-SSSE3-NEXT: andl $3, %eax
235; SSE2-SSSE3-NEXT: movq %rax, %xmm1
236; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
237; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
238; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
239; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
240; SSE2-SSSE3-NEXT: retq
241;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000242; AVX12-LABEL: bitcast_v4i64_to_v2i2:
243; AVX12: # %bb.0:
244; AVX12-NEXT: vmovmskpd %ymm0, %eax
245; AVX12-NEXT: movl %eax, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000246; AVX12-NEXT: andl $3, %ecx
247; AVX12-NEXT: vmovq %rcx, %xmm0
248; AVX12-NEXT: shrl $2, %eax
249; AVX12-NEXT: vmovq %rax, %xmm1
250; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
251; AVX12-NEXT: vpextrb $0, %xmm1, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000252; AVX12-NEXT: addb %cl, %al
253; AVX12-NEXT: # kill: def $al killed $al killed $eax
254; AVX12-NEXT: vzeroupper
255; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000256;
257; AVX512-LABEL: bitcast_v4i64_to_v2i2:
258; AVX512: # %bb.0:
259; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
260; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
261; AVX512-NEXT: kmovd %k0, %eax
262; AVX512-NEXT: movzbl %al, %ecx
263; AVX512-NEXT: shrl $2, %ecx
264; AVX512-NEXT: andl $3, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000265; AVX512-NEXT: vmovq %rcx, %xmm0
266; AVX512-NEXT: movzwl %ax, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000267; AVX512-NEXT: andl $3, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000268; AVX512-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000269; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
270; AVX512-NEXT: vpextrb $0, %xmm0, %eax
271; AVX512-NEXT: addb %cl, %al
272; AVX512-NEXT: # kill: def $al killed $al killed $eax
273; AVX512-NEXT: vzeroupper
274; AVX512-NEXT: retq
275 %1 = icmp slt <4 x i64> %a0, zeroinitializer
276 %2 = bitcast <4 x i1> %1 to <2 x i2>
277 %3 = extractelement <2 x i2> %2, i32 0
278 %4 = extractelement <2 x i2> %2, i32 1
279 %5 = add i2 %3, %4
280 ret i2 %5
281}
282
283define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
284; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
285; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000286; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
287; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
288; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
289; SSE2-SSSE3-NEXT: movzbl %al, %ecx
290; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000291; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
292; SSE2-SSSE3-NEXT: andl $15, %eax
293; SSE2-SSSE3-NEXT: movq %rax, %xmm1
294; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
295; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
296; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
297; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
298; SSE2-SSSE3-NEXT: retq
299;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000300; AVX12-LABEL: bitcast_v8i32_to_v2i4:
301; AVX12: # %bb.0:
302; AVX12-NEXT: vmovmskps %ymm0, %eax
303; AVX12-NEXT: movl %eax, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000304; AVX12-NEXT: andl $15, %ecx
305; AVX12-NEXT: vmovq %rcx, %xmm0
306; AVX12-NEXT: shrl $4, %eax
307; AVX12-NEXT: vmovq %rax, %xmm1
308; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
309; AVX12-NEXT: vpextrb $0, %xmm1, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000310; AVX12-NEXT: addb %cl, %al
311; AVX12-NEXT: # kill: def $al killed $al killed $eax
312; AVX12-NEXT: vzeroupper
313; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000314;
315; AVX512-LABEL: bitcast_v8i32_to_v2i4:
316; AVX512: # %bb.0:
317; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
318; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
319; AVX512-NEXT: kmovd %k0, %eax
320; AVX512-NEXT: movzbl %al, %ecx
321; AVX512-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000322; AVX512-NEXT: vmovq %rcx, %xmm0
323; AVX512-NEXT: movzwl %ax, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000324; AVX512-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000325; AVX512-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000326; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
327; AVX512-NEXT: vpextrb $0, %xmm0, %eax
328; AVX512-NEXT: addb %cl, %al
329; AVX512-NEXT: # kill: def $al killed $al killed $eax
330; AVX512-NEXT: vzeroupper
331; AVX512-NEXT: retq
332 %1 = icmp slt <8 x i32> %a0, zeroinitializer
333 %2 = bitcast <8 x i1> %1 to <2 x i4>
334 %3 = extractelement <2 x i4> %2, i32 0
335 %4 = extractelement <2 x i4> %2, i32 1
336 %5 = add i4 %3, %4
337 ret i4 %5
338}
339
340define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
341; SSE2-LABEL: bitcast_v16i16_to_v2i8:
342; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000343; SSE2-NEXT: packsswb %xmm1, %xmm0
344; SSE2-NEXT: pmovmskb %xmm0, %eax
345; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000346; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
347; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
348; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
349; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
350; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
351; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
352; SSE2-NEXT: retq
353;
354; SSSE3-LABEL: bitcast_v16i16_to_v2i8:
355; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000356; SSSE3-NEXT: packsswb %xmm1, %xmm0
357; SSSE3-NEXT: pmovmskb %xmm0, %eax
358; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000359; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
360; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
361; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
362; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
363; SSSE3-NEXT: retq
364;
365; AVX1-LABEL: bitcast_v16i16_to_v2i8:
366; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000367; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
368; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
369; AVX1-NEXT: vpmovmskb %xmm0, %eax
370; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000371; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
372; AVX1-NEXT: vpextrb $1, %xmm0, %eax
373; AVX1-NEXT: addb %cl, %al
374; AVX1-NEXT: # kill: def $al killed $al killed $eax
375; AVX1-NEXT: vzeroupper
376; AVX1-NEXT: retq
377;
378; AVX2-LABEL: bitcast_v16i16_to_v2i8:
379; AVX2: # %bb.0:
380; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
381; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000382; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
383; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
384; AVX2-NEXT: vpmovmskb %xmm0, %eax
385; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000386; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
387; AVX2-NEXT: vpextrb $1, %xmm0, %eax
388; AVX2-NEXT: addb %cl, %al
389; AVX2-NEXT: # kill: def $al killed $al killed $eax
390; AVX2-NEXT: vzeroupper
391; AVX2-NEXT: retq
392;
393; AVX512-LABEL: bitcast_v16i16_to_v2i8:
394; AVX512: # %bb.0:
395; AVX512-NEXT: vpmovw2m %ymm0, %k0
396; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
397; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
398; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
399; AVX512-NEXT: vpextrb $1, %xmm0, %eax
400; AVX512-NEXT: addb %cl, %al
401; AVX512-NEXT: # kill: def $al killed $al killed $eax
402; AVX512-NEXT: vzeroupper
403; AVX512-NEXT: retq
404 %1 = icmp slt <16 x i16> %a0, zeroinitializer
405 %2 = bitcast <16 x i1> %1 to <2 x i8>
406 %3 = extractelement <2 x i8> %2, i32 0
407 %4 = extractelement <2 x i8> %2, i32 1
408 %5 = add i8 %3, %4
409 ret i8 %5
410}
411
412define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
413; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
414; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000415; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
416; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
417; SSE2-SSSE3-NEXT: shll $16, %ecx
418; SSE2-SSSE3-NEXT: orl %eax, %ecx
419; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
420; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
421; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
422; SSE2-SSSE3-NEXT: addl %ecx, %eax
423; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000424; SSE2-SSSE3-NEXT: retq
425;
426; AVX1-LABEL: bitcast_v32i8_to_v2i16:
427; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000428; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000429; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000430; AVX1-NEXT: vpmovmskb %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000431; AVX1-NEXT: shll $16, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000432; AVX1-NEXT: orl %eax, %ecx
433; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000434; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
435; AVX1-NEXT: vpextrw $1, %xmm0, %eax
436; AVX1-NEXT: addl %ecx, %eax
437; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000438; AVX1-NEXT: vzeroupper
439; AVX1-NEXT: retq
440;
441; AVX2-LABEL: bitcast_v32i8_to_v2i16:
442; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000443; AVX2-NEXT: vpmovmskb %ymm0, %eax
444; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000445; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
446; AVX2-NEXT: vpextrw $1, %xmm0, %eax
447; AVX2-NEXT: addl %ecx, %eax
448; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000449; AVX2-NEXT: vzeroupper
450; AVX2-NEXT: retq
451;
452; AVX512-LABEL: bitcast_v32i8_to_v2i16:
453; AVX512: # %bb.0:
454; AVX512-NEXT: pushq %rbp
455; AVX512-NEXT: movq %rsp, %rbp
456; AVX512-NEXT: andq $-32, %rsp
457; AVX512-NEXT: subq $32, %rsp
458; AVX512-NEXT: vpmovb2m %ymm0, %k0
459; AVX512-NEXT: kmovd %k0, (%rsp)
460; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
461; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
462; AVX512-NEXT: vpextrw $1, %xmm0, %eax
463; AVX512-NEXT: addl %ecx, %eax
464; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
465; AVX512-NEXT: movq %rbp, %rsp
466; AVX512-NEXT: popq %rbp
467; AVX512-NEXT: vzeroupper
468; AVX512-NEXT: retq
469 %1 = icmp slt <32 x i8> %a0, zeroinitializer
470 %2 = bitcast <32 x i1> %1 to <2 x i16>
471 %3 = extractelement <2 x i16> %2, i32 0
472 %4 = extractelement <2 x i16> %2, i32 1
473 %5 = add i16 %3, %4
474 ret i16 %5
475}
476
477;
478; 512-bit vectors
479;
480
481define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
482; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
483; SSE2-SSSE3: # %bb.0:
484; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000485; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000486; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000487; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000488; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000489; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
490; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
491; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000492; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000493; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
494; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
495; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
496; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
497; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
498; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
499; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
500; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
501; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
502; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
503; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000504; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000505; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
506; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
507; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000508; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
509; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
510; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000511; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
512; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
513; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
514; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
515; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
516; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
517; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000518; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000519; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
520; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
521; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
522; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
523; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
524; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
525; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
526; SSE2-SSSE3-NEXT: movzbl %al, %ecx
527; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000528; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
529; SSE2-SSSE3-NEXT: andl $15, %eax
530; SSE2-SSSE3-NEXT: movq %rax, %xmm1
531; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
532; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
533; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
534; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
535; SSE2-SSSE3-NEXT: retq
536;
537; AVX1-LABEL: bitcast_v8i64_to_v2i4:
538; AVX1: # %bb.0:
Craig Topper55a71b52019-05-06 19:29:24 +0000539; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000540; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000541; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Craig Topper55a71b52019-05-06 19:29:24 +0000542; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
543; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
544; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
545; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000546; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
547; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000548; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
549; AVX1-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000550; AVX1-NEXT: movl %eax, %ecx
551; AVX1-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000552; AVX1-NEXT: vmovq %rcx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000553; AVX1-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000554; AVX1-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000555; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
556; AVX1-NEXT: vpextrb $0, %xmm0, %eax
557; AVX1-NEXT: addb %cl, %al
558; AVX1-NEXT: # kill: def $al killed $al killed $eax
559; AVX1-NEXT: vzeroupper
560; AVX1-NEXT: retq
561;
562; AVX2-LABEL: bitcast_v8i64_to_v2i4:
563; AVX2: # %bb.0:
564; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
565; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000566; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000567; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
568; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
569; AVX2-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000570; AVX2-NEXT: movl %eax, %ecx
571; AVX2-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000572; AVX2-NEXT: vmovq %rcx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000573; AVX2-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000574; AVX2-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000575; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
576; AVX2-NEXT: vpextrb $0, %xmm0, %eax
577; AVX2-NEXT: addb %cl, %al
578; AVX2-NEXT: # kill: def $al killed $al killed $eax
579; AVX2-NEXT: vzeroupper
580; AVX2-NEXT: retq
581;
582; AVX512-LABEL: bitcast_v8i64_to_v2i4:
583; AVX512: # %bb.0:
584; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
585; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
586; AVX512-NEXT: kmovd %k0, %eax
587; AVX512-NEXT: movzbl %al, %ecx
588; AVX512-NEXT: shrl $4, %ecx
Craig Topper55a71b52019-05-06 19:29:24 +0000589; AVX512-NEXT: vmovq %rcx, %xmm0
590; AVX512-NEXT: movzwl %ax, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000591; AVX512-NEXT: andl $15, %eax
Craig Topper55a71b52019-05-06 19:29:24 +0000592; AVX512-NEXT: vmovq %rax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000593; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
594; AVX512-NEXT: vpextrb $0, %xmm0, %eax
595; AVX512-NEXT: addb %cl, %al
596; AVX512-NEXT: # kill: def $al killed $al killed $eax
597; AVX512-NEXT: vzeroupper
598; AVX512-NEXT: retq
599 %1 = icmp slt <8 x i64> %a0, zeroinitializer
600 %2 = bitcast <8 x i1> %1 to <2 x i4>
601 %3 = extractelement <2 x i4> %2, i32 0
602 %4 = extractelement <2 x i4> %2, i32 1
603 %5 = add i4 %3, %4
604 ret i4 %5
605}
606
607define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
608; SSE2-LABEL: bitcast_v16i32_to_v2i8:
609; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000610; SSE2-NEXT: packssdw %xmm3, %xmm2
611; SSE2-NEXT: packssdw %xmm1, %xmm0
612; SSE2-NEXT: packsswb %xmm2, %xmm0
613; SSE2-NEXT: pmovmskb %xmm0, %eax
614; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000615; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
616; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
617; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
618; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
619; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
620; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
621; SSE2-NEXT: retq
622;
623; SSSE3-LABEL: bitcast_v16i32_to_v2i8:
624; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000625; SSSE3-NEXT: packssdw %xmm3, %xmm2
626; SSSE3-NEXT: packssdw %xmm1, %xmm0
627; SSSE3-NEXT: packsswb %xmm2, %xmm0
628; SSSE3-NEXT: pmovmskb %xmm0, %eax
629; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000630; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
631; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
632; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
633; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
634; SSSE3-NEXT: retq
635;
636; AVX1-LABEL: bitcast_v16i32_to_v2i8:
637; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000638; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
639; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
640; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
641; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
642; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
643; AVX1-NEXT: vpmovmskb %xmm0, %eax
644; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000645; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
646; AVX1-NEXT: vpextrb $1, %xmm0, %eax
647; AVX1-NEXT: addb %cl, %al
648; AVX1-NEXT: # kill: def $al killed $al killed $eax
649; AVX1-NEXT: vzeroupper
650; AVX1-NEXT: retq
651;
652; AVX2-LABEL: bitcast_v16i32_to_v2i8:
653; AVX2: # %bb.0:
654; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000655; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000656; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000657; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
658; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
659; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
660; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
661; AVX2-NEXT: vpmovmskb %xmm0, %eax
662; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000663; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
664; AVX2-NEXT: vpextrb $1, %xmm0, %eax
665; AVX2-NEXT: addb %cl, %al
666; AVX2-NEXT: # kill: def $al killed $al killed $eax
667; AVX2-NEXT: vzeroupper
668; AVX2-NEXT: retq
669;
670; AVX512-LABEL: bitcast_v16i32_to_v2i8:
671; AVX512: # %bb.0:
672; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
673; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
674; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
675; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
676; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
677; AVX512-NEXT: vpextrb $1, %xmm0, %eax
678; AVX512-NEXT: addb %cl, %al
679; AVX512-NEXT: # kill: def $al killed $al killed $eax
680; AVX512-NEXT: vzeroupper
681; AVX512-NEXT: retq
682 %1 = icmp slt <16 x i32> %a0, zeroinitializer
683 %2 = bitcast <16 x i1> %1 to <2 x i8>
684 %3 = extractelement <2 x i8> %2, i32 0
685 %4 = extractelement <2 x i8> %2, i32 1
686 %5 = add i8 %3, %4
687 ret i8 %5
688}
689
690define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
691; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
692; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000693; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
694; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
695; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
696; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
697; SSE2-SSSE3-NEXT: shll $16, %ecx
698; SSE2-SSSE3-NEXT: orl %eax, %ecx
699; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
700; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
701; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
702; SSE2-SSSE3-NEXT: addl %ecx, %eax
703; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000704; SSE2-SSSE3-NEXT: retq
705;
706; AVX1-LABEL: bitcast_v32i16_to_v2i16:
707; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000708; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
709; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
710; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000711; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000712; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
713; AVX1-NEXT: vpmovmskb %xmm0, %ecx
714; AVX1-NEXT: shll $16, %ecx
715; AVX1-NEXT: orl %eax, %ecx
716; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000717; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
718; AVX1-NEXT: vpextrw $1, %xmm0, %eax
719; AVX1-NEXT: addl %ecx, %eax
720; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000721; AVX1-NEXT: vzeroupper
722; AVX1-NEXT: retq
723;
724; AVX2-LABEL: bitcast_v32i16_to_v2i16:
725; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000726; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
727; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
728; AVX2-NEXT: vpmovmskb %ymm0, %eax
729; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000730; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
731; AVX2-NEXT: vpextrw $1, %xmm0, %eax
732; AVX2-NEXT: addl %ecx, %eax
733; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000734; AVX2-NEXT: vzeroupper
735; AVX2-NEXT: retq
736;
737; AVX512-LABEL: bitcast_v32i16_to_v2i16:
738; AVX512: # %bb.0:
739; AVX512-NEXT: pushq %rbp
740; AVX512-NEXT: movq %rsp, %rbp
741; AVX512-NEXT: andq $-32, %rsp
742; AVX512-NEXT: subq $32, %rsp
743; AVX512-NEXT: vpmovw2m %zmm0, %k0
744; AVX512-NEXT: kmovd %k0, (%rsp)
745; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
746; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
747; AVX512-NEXT: vpextrw $1, %xmm0, %eax
748; AVX512-NEXT: addl %ecx, %eax
749; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
750; AVX512-NEXT: movq %rbp, %rsp
751; AVX512-NEXT: popq %rbp
752; AVX512-NEXT: vzeroupper
753; AVX512-NEXT: retq
754 %1 = icmp slt <32 x i16> %a0, zeroinitializer
755 %2 = bitcast <32 x i1> %1 to <2 x i16>
756 %3 = extractelement <2 x i16> %2, i32 0
757 %4 = extractelement <2 x i16> %2, i32 1
758 %5 = add i16 %3, %4
759 ret i16 %5
760}
761
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000762define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
763; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000764; SSE2-SSSE3: # %bb.0:
Craig Topperf9c30ed2019-04-25 18:19:59 +0000765; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4
766; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
767; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm5
768; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
769; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
770; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm3
771; SSE2-SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
772; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
773; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm2
774; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
775; SSE2-SSSE3-NEXT: pcmpgtb %xmm0, %xmm4
776; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
777; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
778; SSE2-SSSE3-NEXT: andl $1, %eax
779; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
780; SSE2-SSSE3-NEXT: andl $1, %ecx
781; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
782; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
783; SSE2-SSSE3-NEXT: andl $1, %ecx
784; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
785; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
786; SSE2-SSSE3-NEXT: andl $1, %ecx
787; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
788; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
789; SSE2-SSSE3-NEXT: andl $1, %ecx
790; SSE2-SSSE3-NEXT: shll $4, %ecx
791; SSE2-SSSE3-NEXT: orl %eax, %ecx
792; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
793; SSE2-SSSE3-NEXT: andl $1, %eax
794; SSE2-SSSE3-NEXT: shll $5, %eax
795; SSE2-SSSE3-NEXT: orl %ecx, %eax
796; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
797; SSE2-SSSE3-NEXT: andl $1, %ecx
798; SSE2-SSSE3-NEXT: shll $6, %ecx
799; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
800; SSE2-SSSE3-NEXT: andl $1, %edx
801; SSE2-SSSE3-NEXT: shll $7, %edx
802; SSE2-SSSE3-NEXT: orl %ecx, %edx
803; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
804; SSE2-SSSE3-NEXT: andl $1, %ecx
805; SSE2-SSSE3-NEXT: shll $8, %ecx
806; SSE2-SSSE3-NEXT: orl %edx, %ecx
807; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
808; SSE2-SSSE3-NEXT: andl $1, %edx
809; SSE2-SSSE3-NEXT: shll $9, %edx
810; SSE2-SSSE3-NEXT: orl %ecx, %edx
811; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
812; SSE2-SSSE3-NEXT: andl $1, %ecx
813; SSE2-SSSE3-NEXT: shll $10, %ecx
814; SSE2-SSSE3-NEXT: orl %edx, %ecx
815; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
816; SSE2-SSSE3-NEXT: andl $1, %edx
817; SSE2-SSSE3-NEXT: shll $11, %edx
818; SSE2-SSSE3-NEXT: orl %ecx, %edx
819; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
820; SSE2-SSSE3-NEXT: andl $1, %ecx
821; SSE2-SSSE3-NEXT: shll $12, %ecx
822; SSE2-SSSE3-NEXT: orl %edx, %ecx
823; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
824; SSE2-SSSE3-NEXT: andl $1, %edx
825; SSE2-SSSE3-NEXT: shll $13, %edx
826; SSE2-SSSE3-NEXT: orl %ecx, %edx
827; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
828; SSE2-SSSE3-NEXT: andl $1, %ecx
829; SSE2-SSSE3-NEXT: shll $14, %ecx
830; SSE2-SSSE3-NEXT: orl %edx, %ecx
831; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
832; SSE2-SSSE3-NEXT: shll $15, %edx
833; SSE2-SSSE3-NEXT: orl %ecx, %edx
834; SSE2-SSSE3-NEXT: orl %eax, %edx
835; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
836; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
837; SSE2-SSSE3-NEXT: andl $1, %eax
838; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
839; SSE2-SSSE3-NEXT: andl $1, %ecx
840; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
841; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
842; SSE2-SSSE3-NEXT: andl $1, %ecx
843; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
844; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
845; SSE2-SSSE3-NEXT: andl $1, %ecx
846; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
847; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
848; SSE2-SSSE3-NEXT: andl $1, %ecx
849; SSE2-SSSE3-NEXT: shll $4, %ecx
850; SSE2-SSSE3-NEXT: orl %eax, %ecx
851; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
852; SSE2-SSSE3-NEXT: andl $1, %eax
853; SSE2-SSSE3-NEXT: shll $5, %eax
854; SSE2-SSSE3-NEXT: orl %ecx, %eax
855; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
856; SSE2-SSSE3-NEXT: andl $1, %ecx
857; SSE2-SSSE3-NEXT: shll $6, %ecx
858; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
859; SSE2-SSSE3-NEXT: andl $1, %edx
860; SSE2-SSSE3-NEXT: shll $7, %edx
861; SSE2-SSSE3-NEXT: orl %ecx, %edx
862; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
863; SSE2-SSSE3-NEXT: andl $1, %ecx
864; SSE2-SSSE3-NEXT: shll $8, %ecx
865; SSE2-SSSE3-NEXT: orl %edx, %ecx
866; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
867; SSE2-SSSE3-NEXT: andl $1, %edx
868; SSE2-SSSE3-NEXT: shll $9, %edx
869; SSE2-SSSE3-NEXT: orl %ecx, %edx
870; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
871; SSE2-SSSE3-NEXT: andl $1, %ecx
872; SSE2-SSSE3-NEXT: shll $10, %ecx
873; SSE2-SSSE3-NEXT: orl %edx, %ecx
874; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
875; SSE2-SSSE3-NEXT: andl $1, %edx
876; SSE2-SSSE3-NEXT: shll $11, %edx
877; SSE2-SSSE3-NEXT: orl %ecx, %edx
878; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
879; SSE2-SSSE3-NEXT: andl $1, %ecx
880; SSE2-SSSE3-NEXT: shll $12, %ecx
881; SSE2-SSSE3-NEXT: orl %edx, %ecx
882; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
883; SSE2-SSSE3-NEXT: andl $1, %edx
884; SSE2-SSSE3-NEXT: shll $13, %edx
885; SSE2-SSSE3-NEXT: orl %ecx, %edx
886; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
887; SSE2-SSSE3-NEXT: andl $1, %ecx
888; SSE2-SSSE3-NEXT: shll $14, %ecx
889; SSE2-SSSE3-NEXT: orl %edx, %ecx
890; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
891; SSE2-SSSE3-NEXT: shll $15, %edx
892; SSE2-SSSE3-NEXT: orl %ecx, %edx
893; SSE2-SSSE3-NEXT: orl %eax, %edx
894; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
895; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
896; SSE2-SSSE3-NEXT: andl $1, %eax
897; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
898; SSE2-SSSE3-NEXT: andl $1, %ecx
899; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
900; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
901; SSE2-SSSE3-NEXT: andl $1, %ecx
902; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
903; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
904; SSE2-SSSE3-NEXT: andl $1, %ecx
905; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
906; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
907; SSE2-SSSE3-NEXT: andl $1, %ecx
908; SSE2-SSSE3-NEXT: shll $4, %ecx
909; SSE2-SSSE3-NEXT: orl %eax, %ecx
910; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
911; SSE2-SSSE3-NEXT: andl $1, %eax
912; SSE2-SSSE3-NEXT: shll $5, %eax
913; SSE2-SSSE3-NEXT: orl %ecx, %eax
914; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
915; SSE2-SSSE3-NEXT: andl $1, %ecx
916; SSE2-SSSE3-NEXT: shll $6, %ecx
917; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
918; SSE2-SSSE3-NEXT: andl $1, %edx
919; SSE2-SSSE3-NEXT: shll $7, %edx
920; SSE2-SSSE3-NEXT: orl %ecx, %edx
921; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
922; SSE2-SSSE3-NEXT: andl $1, %ecx
923; SSE2-SSSE3-NEXT: shll $8, %ecx
924; SSE2-SSSE3-NEXT: orl %edx, %ecx
925; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
926; SSE2-SSSE3-NEXT: andl $1, %edx
927; SSE2-SSSE3-NEXT: shll $9, %edx
928; SSE2-SSSE3-NEXT: orl %ecx, %edx
929; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
930; SSE2-SSSE3-NEXT: andl $1, %ecx
931; SSE2-SSSE3-NEXT: shll $10, %ecx
932; SSE2-SSSE3-NEXT: orl %edx, %ecx
933; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
934; SSE2-SSSE3-NEXT: andl $1, %edx
935; SSE2-SSSE3-NEXT: shll $11, %edx
936; SSE2-SSSE3-NEXT: orl %ecx, %edx
937; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
938; SSE2-SSSE3-NEXT: andl $1, %ecx
939; SSE2-SSSE3-NEXT: shll $12, %ecx
940; SSE2-SSSE3-NEXT: orl %edx, %ecx
941; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
942; SSE2-SSSE3-NEXT: andl $1, %edx
943; SSE2-SSSE3-NEXT: shll $13, %edx
944; SSE2-SSSE3-NEXT: orl %ecx, %edx
945; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
946; SSE2-SSSE3-NEXT: andl $1, %ecx
947; SSE2-SSSE3-NEXT: shll $14, %ecx
948; SSE2-SSSE3-NEXT: orl %edx, %ecx
949; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
950; SSE2-SSSE3-NEXT: shll $15, %edx
951; SSE2-SSSE3-NEXT: orl %ecx, %edx
952; SSE2-SSSE3-NEXT: orl %eax, %edx
953; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
954; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
955; SSE2-SSSE3-NEXT: andl $1, %eax
956; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
957; SSE2-SSSE3-NEXT: andl $1, %ecx
958; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
959; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
960; SSE2-SSSE3-NEXT: andl $1, %ecx
961; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
962; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
963; SSE2-SSSE3-NEXT: andl $1, %ecx
964; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
965; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
966; SSE2-SSSE3-NEXT: andl $1, %ecx
967; SSE2-SSSE3-NEXT: shll $4, %ecx
968; SSE2-SSSE3-NEXT: orl %eax, %ecx
969; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
970; SSE2-SSSE3-NEXT: andl $1, %eax
971; SSE2-SSSE3-NEXT: shll $5, %eax
972; SSE2-SSSE3-NEXT: orl %ecx, %eax
973; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
974; SSE2-SSSE3-NEXT: andl $1, %ecx
975; SSE2-SSSE3-NEXT: shll $6, %ecx
976; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
977; SSE2-SSSE3-NEXT: andl $1, %edx
978; SSE2-SSSE3-NEXT: shll $7, %edx
979; SSE2-SSSE3-NEXT: orl %ecx, %edx
980; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
981; SSE2-SSSE3-NEXT: andl $1, %ecx
982; SSE2-SSSE3-NEXT: shll $8, %ecx
983; SSE2-SSSE3-NEXT: orl %edx, %ecx
984; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
985; SSE2-SSSE3-NEXT: andl $1, %edx
986; SSE2-SSSE3-NEXT: shll $9, %edx
987; SSE2-SSSE3-NEXT: orl %ecx, %edx
988; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
989; SSE2-SSSE3-NEXT: andl $1, %ecx
990; SSE2-SSSE3-NEXT: shll $10, %ecx
991; SSE2-SSSE3-NEXT: orl %edx, %ecx
992; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
993; SSE2-SSSE3-NEXT: andl $1, %edx
994; SSE2-SSSE3-NEXT: shll $11, %edx
995; SSE2-SSSE3-NEXT: orl %ecx, %edx
996; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
997; SSE2-SSSE3-NEXT: andl $1, %ecx
998; SSE2-SSSE3-NEXT: shll $12, %ecx
999; SSE2-SSSE3-NEXT: orl %edx, %ecx
1000; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
1001; SSE2-SSSE3-NEXT: andl $1, %edx
1002; SSE2-SSSE3-NEXT: shll $13, %edx
1003; SSE2-SSSE3-NEXT: orl %ecx, %edx
1004; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
1005; SSE2-SSSE3-NEXT: andl $1, %ecx
1006; SSE2-SSSE3-NEXT: shll $14, %ecx
1007; SSE2-SSSE3-NEXT: orl %edx, %ecx
1008; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
1009; SSE2-SSSE3-NEXT: shll $15, %edx
1010; SSE2-SSSE3-NEXT: orl %ecx, %edx
1011; SSE2-SSSE3-NEXT: orl %eax, %edx
1012; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
1013; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1014; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
1015; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1]
1016; SSE2-SSSE3-NEXT: movd %xmm0, %eax
1017; SSE2-SSSE3-NEXT: addl %ecx, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001018; SSE2-SSSE3-NEXT: retq
1019;
Craig Topperf9c30ed2019-04-25 18:19:59 +00001020; AVX1-LABEL: bitcast_v64i8_to_v2i32:
1021; AVX1: # %bb.0:
1022; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1023; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
1024; AVX1-NEXT: vpextrb $1, %xmm3, %eax
1025; AVX1-NEXT: andl $1, %eax
1026; AVX1-NEXT: vpextrb $0, %xmm3, %ecx
1027; AVX1-NEXT: andl $1, %ecx
1028; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1029; AVX1-NEXT: vpextrb $2, %xmm3, %ecx
1030; AVX1-NEXT: andl $1, %ecx
1031; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1032; AVX1-NEXT: vpextrb $3, %xmm3, %ecx
1033; AVX1-NEXT: andl $1, %ecx
1034; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1035; AVX1-NEXT: vpextrb $4, %xmm3, %ecx
1036; AVX1-NEXT: andl $1, %ecx
1037; AVX1-NEXT: shll $4, %ecx
1038; AVX1-NEXT: orl %eax, %ecx
1039; AVX1-NEXT: vpextrb $5, %xmm3, %eax
1040; AVX1-NEXT: andl $1, %eax
1041; AVX1-NEXT: shll $5, %eax
1042; AVX1-NEXT: orl %ecx, %eax
1043; AVX1-NEXT: vpextrb $6, %xmm3, %ecx
1044; AVX1-NEXT: andl $1, %ecx
1045; AVX1-NEXT: shll $6, %ecx
1046; AVX1-NEXT: vpextrb $7, %xmm3, %edx
1047; AVX1-NEXT: andl $1, %edx
1048; AVX1-NEXT: shll $7, %edx
1049; AVX1-NEXT: orl %ecx, %edx
1050; AVX1-NEXT: vpextrb $8, %xmm3, %ecx
1051; AVX1-NEXT: andl $1, %ecx
1052; AVX1-NEXT: shll $8, %ecx
1053; AVX1-NEXT: orl %edx, %ecx
1054; AVX1-NEXT: vpextrb $9, %xmm3, %edx
1055; AVX1-NEXT: andl $1, %edx
1056; AVX1-NEXT: shll $9, %edx
1057; AVX1-NEXT: orl %ecx, %edx
1058; AVX1-NEXT: vpextrb $10, %xmm3, %ecx
1059; AVX1-NEXT: andl $1, %ecx
1060; AVX1-NEXT: shll $10, %ecx
1061; AVX1-NEXT: orl %edx, %ecx
1062; AVX1-NEXT: vpextrb $11, %xmm3, %edx
1063; AVX1-NEXT: andl $1, %edx
1064; AVX1-NEXT: shll $11, %edx
1065; AVX1-NEXT: orl %ecx, %edx
1066; AVX1-NEXT: vpextrb $12, %xmm3, %ecx
1067; AVX1-NEXT: andl $1, %ecx
1068; AVX1-NEXT: shll $12, %ecx
1069; AVX1-NEXT: orl %edx, %ecx
1070; AVX1-NEXT: vpextrb $13, %xmm3, %edx
1071; AVX1-NEXT: andl $1, %edx
1072; AVX1-NEXT: shll $13, %edx
1073; AVX1-NEXT: orl %ecx, %edx
1074; AVX1-NEXT: vpextrb $14, %xmm3, %ecx
1075; AVX1-NEXT: andl $1, %ecx
1076; AVX1-NEXT: shll $14, %ecx
1077; AVX1-NEXT: orl %edx, %ecx
1078; AVX1-NEXT: vpextrb $15, %xmm3, %edx
1079; AVX1-NEXT: andl $1, %edx
1080; AVX1-NEXT: shll $15, %edx
1081; AVX1-NEXT: orl %ecx, %edx
1082; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1083; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1084; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1085; AVX1-NEXT: andl $1, %ecx
1086; AVX1-NEXT: shll $16, %ecx
1087; AVX1-NEXT: orl %edx, %ecx
1088; AVX1-NEXT: vpextrb $1, %xmm1, %edx
1089; AVX1-NEXT: andl $1, %edx
1090; AVX1-NEXT: shll $17, %edx
1091; AVX1-NEXT: orl %ecx, %edx
1092; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1093; AVX1-NEXT: andl $1, %ecx
1094; AVX1-NEXT: shll $18, %ecx
1095; AVX1-NEXT: orl %edx, %ecx
1096; AVX1-NEXT: vpextrb $3, %xmm1, %edx
1097; AVX1-NEXT: andl $1, %edx
1098; AVX1-NEXT: shll $19, %edx
1099; AVX1-NEXT: orl %ecx, %edx
1100; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1101; AVX1-NEXT: andl $1, %ecx
1102; AVX1-NEXT: shll $20, %ecx
1103; AVX1-NEXT: orl %edx, %ecx
1104; AVX1-NEXT: vpextrb $5, %xmm1, %edx
1105; AVX1-NEXT: andl $1, %edx
1106; AVX1-NEXT: shll $21, %edx
1107; AVX1-NEXT: orl %ecx, %edx
1108; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1109; AVX1-NEXT: andl $1, %ecx
1110; AVX1-NEXT: shll $22, %ecx
1111; AVX1-NEXT: orl %edx, %ecx
1112; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1113; AVX1-NEXT: andl $1, %edx
1114; AVX1-NEXT: shll $23, %edx
1115; AVX1-NEXT: orl %ecx, %edx
1116; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1117; AVX1-NEXT: andl $1, %ecx
1118; AVX1-NEXT: shll $24, %ecx
1119; AVX1-NEXT: orl %edx, %ecx
1120; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1121; AVX1-NEXT: andl $1, %edx
1122; AVX1-NEXT: shll $25, %edx
1123; AVX1-NEXT: orl %ecx, %edx
1124; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1125; AVX1-NEXT: andl $1, %ecx
1126; AVX1-NEXT: shll $26, %ecx
1127; AVX1-NEXT: orl %edx, %ecx
1128; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1129; AVX1-NEXT: andl $1, %edx
1130; AVX1-NEXT: shll $27, %edx
1131; AVX1-NEXT: orl %ecx, %edx
1132; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1133; AVX1-NEXT: andl $1, %ecx
1134; AVX1-NEXT: shll $28, %ecx
1135; AVX1-NEXT: orl %edx, %ecx
1136; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1137; AVX1-NEXT: andl $1, %edx
1138; AVX1-NEXT: shll $29, %edx
1139; AVX1-NEXT: orl %ecx, %edx
1140; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1141; AVX1-NEXT: andl $1, %ecx
1142; AVX1-NEXT: shll $30, %ecx
1143; AVX1-NEXT: orl %edx, %ecx
1144; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1145; AVX1-NEXT: shll $31, %edx
1146; AVX1-NEXT: orl %ecx, %edx
1147; AVX1-NEXT: orl %eax, %edx
1148; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1149; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm1
1150; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1151; AVX1-NEXT: andl $1, %eax
1152; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1153; AVX1-NEXT: andl $1, %ecx
1154; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1155; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1156; AVX1-NEXT: andl $1, %ecx
1157; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1158; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
1159; AVX1-NEXT: andl $1, %ecx
1160; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1161; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1162; AVX1-NEXT: andl $1, %ecx
1163; AVX1-NEXT: shll $4, %ecx
1164; AVX1-NEXT: orl %eax, %ecx
1165; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1166; AVX1-NEXT: andl $1, %eax
1167; AVX1-NEXT: shll $5, %eax
1168; AVX1-NEXT: orl %ecx, %eax
1169; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1170; AVX1-NEXT: andl $1, %ecx
1171; AVX1-NEXT: shll $6, %ecx
1172; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1173; AVX1-NEXT: andl $1, %edx
1174; AVX1-NEXT: shll $7, %edx
1175; AVX1-NEXT: orl %ecx, %edx
1176; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1177; AVX1-NEXT: andl $1, %ecx
1178; AVX1-NEXT: shll $8, %ecx
1179; AVX1-NEXT: orl %edx, %ecx
1180; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1181; AVX1-NEXT: andl $1, %edx
1182; AVX1-NEXT: shll $9, %edx
1183; AVX1-NEXT: orl %ecx, %edx
1184; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1185; AVX1-NEXT: andl $1, %ecx
1186; AVX1-NEXT: shll $10, %ecx
1187; AVX1-NEXT: orl %edx, %ecx
1188; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1189; AVX1-NEXT: andl $1, %edx
1190; AVX1-NEXT: shll $11, %edx
1191; AVX1-NEXT: orl %ecx, %edx
1192; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1193; AVX1-NEXT: andl $1, %ecx
1194; AVX1-NEXT: shll $12, %ecx
1195; AVX1-NEXT: orl %edx, %ecx
1196; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1197; AVX1-NEXT: andl $1, %edx
1198; AVX1-NEXT: shll $13, %edx
1199; AVX1-NEXT: orl %ecx, %edx
1200; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1201; AVX1-NEXT: andl $1, %ecx
1202; AVX1-NEXT: shll $14, %ecx
1203; AVX1-NEXT: orl %edx, %ecx
1204; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1205; AVX1-NEXT: andl $1, %edx
1206; AVX1-NEXT: shll $15, %edx
1207; AVX1-NEXT: orl %ecx, %edx
1208; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1209; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1210; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
1211; AVX1-NEXT: andl $1, %ecx
1212; AVX1-NEXT: shll $16, %ecx
1213; AVX1-NEXT: orl %edx, %ecx
1214; AVX1-NEXT: vpextrb $1, %xmm0, %edx
1215; AVX1-NEXT: andl $1, %edx
1216; AVX1-NEXT: shll $17, %edx
1217; AVX1-NEXT: orl %ecx, %edx
1218; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
1219; AVX1-NEXT: andl $1, %ecx
1220; AVX1-NEXT: shll $18, %ecx
1221; AVX1-NEXT: orl %edx, %ecx
1222; AVX1-NEXT: vpextrb $3, %xmm0, %edx
1223; AVX1-NEXT: andl $1, %edx
1224; AVX1-NEXT: shll $19, %edx
1225; AVX1-NEXT: orl %ecx, %edx
1226; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
1227; AVX1-NEXT: andl $1, %ecx
1228; AVX1-NEXT: shll $20, %ecx
1229; AVX1-NEXT: orl %edx, %ecx
1230; AVX1-NEXT: vpextrb $5, %xmm0, %edx
1231; AVX1-NEXT: andl $1, %edx
1232; AVX1-NEXT: shll $21, %edx
1233; AVX1-NEXT: orl %ecx, %edx
1234; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
1235; AVX1-NEXT: andl $1, %ecx
1236; AVX1-NEXT: shll $22, %ecx
1237; AVX1-NEXT: orl %edx, %ecx
1238; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1239; AVX1-NEXT: andl $1, %edx
1240; AVX1-NEXT: shll $23, %edx
1241; AVX1-NEXT: orl %ecx, %edx
1242; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1243; AVX1-NEXT: andl $1, %ecx
1244; AVX1-NEXT: shll $24, %ecx
1245; AVX1-NEXT: orl %edx, %ecx
1246; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1247; AVX1-NEXT: andl $1, %edx
1248; AVX1-NEXT: shll $25, %edx
1249; AVX1-NEXT: orl %ecx, %edx
1250; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1251; AVX1-NEXT: andl $1, %ecx
1252; AVX1-NEXT: shll $26, %ecx
1253; AVX1-NEXT: orl %edx, %ecx
1254; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1255; AVX1-NEXT: andl $1, %edx
1256; AVX1-NEXT: shll $27, %edx
1257; AVX1-NEXT: orl %ecx, %edx
1258; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1259; AVX1-NEXT: andl $1, %ecx
1260; AVX1-NEXT: shll $28, %ecx
1261; AVX1-NEXT: orl %edx, %ecx
1262; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1263; AVX1-NEXT: andl $1, %edx
1264; AVX1-NEXT: shll $29, %edx
1265; AVX1-NEXT: orl %ecx, %edx
1266; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1267; AVX1-NEXT: andl $1, %ecx
1268; AVX1-NEXT: shll $30, %ecx
1269; AVX1-NEXT: orl %edx, %ecx
1270; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1271; AVX1-NEXT: shll $31, %edx
1272; AVX1-NEXT: orl %ecx, %edx
1273; AVX1-NEXT: orl %eax, %edx
1274; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1275; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1276; AVX1-NEXT: vmovd %xmm0, %ecx
1277; AVX1-NEXT: vpextrd $1, %xmm0, %eax
1278; AVX1-NEXT: addl %ecx, %eax
1279; AVX1-NEXT: vzeroupper
1280; AVX1-NEXT: retq
1281;
1282; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1283; AVX2: # %bb.0:
1284; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1285; AVX2-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
1286; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1287; AVX2-NEXT: andl $1, %eax
1288; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1289; AVX2-NEXT: andl $1, %ecx
1290; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1291; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1292; AVX2-NEXT: andl $1, %ecx
1293; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1294; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1295; AVX2-NEXT: andl $1, %ecx
1296; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1297; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1298; AVX2-NEXT: andl $1, %ecx
1299; AVX2-NEXT: shll $4, %ecx
1300; AVX2-NEXT: orl %eax, %ecx
1301; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1302; AVX2-NEXT: andl $1, %eax
1303; AVX2-NEXT: shll $5, %eax
1304; AVX2-NEXT: orl %ecx, %eax
1305; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1306; AVX2-NEXT: andl $1, %ecx
1307; AVX2-NEXT: shll $6, %ecx
1308; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1309; AVX2-NEXT: andl $1, %edx
1310; AVX2-NEXT: shll $7, %edx
1311; AVX2-NEXT: orl %ecx, %edx
1312; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1313; AVX2-NEXT: andl $1, %ecx
1314; AVX2-NEXT: shll $8, %ecx
1315; AVX2-NEXT: orl %edx, %ecx
1316; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1317; AVX2-NEXT: andl $1, %edx
1318; AVX2-NEXT: shll $9, %edx
1319; AVX2-NEXT: orl %ecx, %edx
1320; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1321; AVX2-NEXT: andl $1, %ecx
1322; AVX2-NEXT: shll $10, %ecx
1323; AVX2-NEXT: orl %edx, %ecx
1324; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1325; AVX2-NEXT: andl $1, %edx
1326; AVX2-NEXT: shll $11, %edx
1327; AVX2-NEXT: orl %ecx, %edx
1328; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1329; AVX2-NEXT: andl $1, %ecx
1330; AVX2-NEXT: shll $12, %ecx
1331; AVX2-NEXT: orl %edx, %ecx
1332; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1333; AVX2-NEXT: andl $1, %edx
1334; AVX2-NEXT: shll $13, %edx
1335; AVX2-NEXT: orl %ecx, %edx
1336; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1337; AVX2-NEXT: andl $1, %ecx
1338; AVX2-NEXT: shll $14, %ecx
1339; AVX2-NEXT: orl %edx, %ecx
1340; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1341; AVX2-NEXT: andl $1, %edx
1342; AVX2-NEXT: shll $15, %edx
1343; AVX2-NEXT: orl %ecx, %edx
1344; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1345; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1346; AVX2-NEXT: andl $1, %ecx
1347; AVX2-NEXT: shll $16, %ecx
1348; AVX2-NEXT: orl %edx, %ecx
1349; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1350; AVX2-NEXT: andl $1, %edx
1351; AVX2-NEXT: shll $17, %edx
1352; AVX2-NEXT: orl %ecx, %edx
1353; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1354; AVX2-NEXT: andl $1, %ecx
1355; AVX2-NEXT: shll $18, %ecx
1356; AVX2-NEXT: orl %edx, %ecx
1357; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1358; AVX2-NEXT: andl $1, %edx
1359; AVX2-NEXT: shll $19, %edx
1360; AVX2-NEXT: orl %ecx, %edx
1361; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1362; AVX2-NEXT: andl $1, %ecx
1363; AVX2-NEXT: shll $20, %ecx
1364; AVX2-NEXT: orl %edx, %ecx
1365; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1366; AVX2-NEXT: andl $1, %edx
1367; AVX2-NEXT: shll $21, %edx
1368; AVX2-NEXT: orl %ecx, %edx
1369; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1370; AVX2-NEXT: andl $1, %ecx
1371; AVX2-NEXT: shll $22, %ecx
1372; AVX2-NEXT: orl %edx, %ecx
1373; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1374; AVX2-NEXT: andl $1, %edx
1375; AVX2-NEXT: shll $23, %edx
1376; AVX2-NEXT: orl %ecx, %edx
1377; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1378; AVX2-NEXT: andl $1, %ecx
1379; AVX2-NEXT: shll $24, %ecx
1380; AVX2-NEXT: orl %edx, %ecx
1381; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1382; AVX2-NEXT: andl $1, %edx
1383; AVX2-NEXT: shll $25, %edx
1384; AVX2-NEXT: orl %ecx, %edx
1385; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1386; AVX2-NEXT: andl $1, %ecx
1387; AVX2-NEXT: shll $26, %ecx
1388; AVX2-NEXT: orl %edx, %ecx
1389; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1390; AVX2-NEXT: andl $1, %edx
1391; AVX2-NEXT: shll $27, %edx
1392; AVX2-NEXT: orl %ecx, %edx
1393; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1394; AVX2-NEXT: andl $1, %ecx
1395; AVX2-NEXT: shll $28, %ecx
1396; AVX2-NEXT: orl %edx, %ecx
1397; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1398; AVX2-NEXT: andl $1, %edx
1399; AVX2-NEXT: shll $29, %edx
1400; AVX2-NEXT: orl %ecx, %edx
1401; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1402; AVX2-NEXT: andl $1, %ecx
1403; AVX2-NEXT: shll $30, %ecx
1404; AVX2-NEXT: orl %edx, %ecx
1405; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1406; AVX2-NEXT: shll $31, %edx
1407; AVX2-NEXT: orl %ecx, %edx
1408; AVX2-NEXT: orl %eax, %edx
1409; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1410; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1411; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1412; AVX2-NEXT: andl $1, %eax
1413; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1414; AVX2-NEXT: andl $1, %ecx
1415; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1416; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1417; AVX2-NEXT: andl $1, %ecx
1418; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1419; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1420; AVX2-NEXT: andl $1, %ecx
1421; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1422; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1423; AVX2-NEXT: andl $1, %ecx
1424; AVX2-NEXT: shll $4, %ecx
1425; AVX2-NEXT: orl %eax, %ecx
1426; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1427; AVX2-NEXT: andl $1, %eax
1428; AVX2-NEXT: shll $5, %eax
1429; AVX2-NEXT: orl %ecx, %eax
1430; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1431; AVX2-NEXT: andl $1, %ecx
1432; AVX2-NEXT: shll $6, %ecx
1433; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1434; AVX2-NEXT: andl $1, %edx
1435; AVX2-NEXT: shll $7, %edx
1436; AVX2-NEXT: orl %ecx, %edx
1437; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1438; AVX2-NEXT: andl $1, %ecx
1439; AVX2-NEXT: shll $8, %ecx
1440; AVX2-NEXT: orl %edx, %ecx
1441; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1442; AVX2-NEXT: andl $1, %edx
1443; AVX2-NEXT: shll $9, %edx
1444; AVX2-NEXT: orl %ecx, %edx
1445; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1446; AVX2-NEXT: andl $1, %ecx
1447; AVX2-NEXT: shll $10, %ecx
1448; AVX2-NEXT: orl %edx, %ecx
1449; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1450; AVX2-NEXT: andl $1, %edx
1451; AVX2-NEXT: shll $11, %edx
1452; AVX2-NEXT: orl %ecx, %edx
1453; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1454; AVX2-NEXT: andl $1, %ecx
1455; AVX2-NEXT: shll $12, %ecx
1456; AVX2-NEXT: orl %edx, %ecx
1457; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1458; AVX2-NEXT: andl $1, %edx
1459; AVX2-NEXT: shll $13, %edx
1460; AVX2-NEXT: orl %ecx, %edx
1461; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1462; AVX2-NEXT: andl $1, %ecx
1463; AVX2-NEXT: shll $14, %ecx
1464; AVX2-NEXT: orl %edx, %ecx
1465; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1466; AVX2-NEXT: andl $1, %edx
1467; AVX2-NEXT: shll $15, %edx
1468; AVX2-NEXT: orl %ecx, %edx
1469; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1470; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1471; AVX2-NEXT: andl $1, %ecx
1472; AVX2-NEXT: shll $16, %ecx
1473; AVX2-NEXT: orl %edx, %ecx
1474; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1475; AVX2-NEXT: andl $1, %edx
1476; AVX2-NEXT: shll $17, %edx
1477; AVX2-NEXT: orl %ecx, %edx
1478; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1479; AVX2-NEXT: andl $1, %ecx
1480; AVX2-NEXT: shll $18, %ecx
1481; AVX2-NEXT: orl %edx, %ecx
1482; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1483; AVX2-NEXT: andl $1, %edx
1484; AVX2-NEXT: shll $19, %edx
1485; AVX2-NEXT: orl %ecx, %edx
1486; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1487; AVX2-NEXT: andl $1, %ecx
1488; AVX2-NEXT: shll $20, %ecx
1489; AVX2-NEXT: orl %edx, %ecx
1490; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1491; AVX2-NEXT: andl $1, %edx
1492; AVX2-NEXT: shll $21, %edx
1493; AVX2-NEXT: orl %ecx, %edx
1494; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1495; AVX2-NEXT: andl $1, %ecx
1496; AVX2-NEXT: shll $22, %ecx
1497; AVX2-NEXT: orl %edx, %ecx
1498; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1499; AVX2-NEXT: andl $1, %edx
1500; AVX2-NEXT: shll $23, %edx
1501; AVX2-NEXT: orl %ecx, %edx
1502; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1503; AVX2-NEXT: andl $1, %ecx
1504; AVX2-NEXT: shll $24, %ecx
1505; AVX2-NEXT: orl %edx, %ecx
1506; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1507; AVX2-NEXT: andl $1, %edx
1508; AVX2-NEXT: shll $25, %edx
1509; AVX2-NEXT: orl %ecx, %edx
1510; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1511; AVX2-NEXT: andl $1, %ecx
1512; AVX2-NEXT: shll $26, %ecx
1513; AVX2-NEXT: orl %edx, %ecx
1514; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1515; AVX2-NEXT: andl $1, %edx
1516; AVX2-NEXT: shll $27, %edx
1517; AVX2-NEXT: orl %ecx, %edx
1518; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1519; AVX2-NEXT: andl $1, %ecx
1520; AVX2-NEXT: shll $28, %ecx
1521; AVX2-NEXT: orl %edx, %ecx
1522; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1523; AVX2-NEXT: andl $1, %edx
1524; AVX2-NEXT: shll $29, %edx
1525; AVX2-NEXT: orl %ecx, %edx
1526; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1527; AVX2-NEXT: andl $1, %ecx
1528; AVX2-NEXT: shll $30, %ecx
1529; AVX2-NEXT: orl %edx, %ecx
1530; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1531; AVX2-NEXT: shll $31, %edx
1532; AVX2-NEXT: orl %ecx, %edx
1533; AVX2-NEXT: orl %eax, %edx
1534; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1535; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1536; AVX2-NEXT: vmovd %xmm0, %ecx
1537; AVX2-NEXT: vpextrd $1, %xmm0, %eax
1538; AVX2-NEXT: addl %ecx, %eax
1539; AVX2-NEXT: vzeroupper
1540; AVX2-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001541;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +00001542; AVX512-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001543; AVX512: # %bb.0:
1544; AVX512-NEXT: vpmovb2m %zmm0, %k0
1545; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
1546; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1547; AVX512-NEXT: vmovd %xmm0, %ecx
1548; AVX512-NEXT: vpextrd $1, %xmm0, %eax
1549; AVX512-NEXT: addl %ecx, %eax
1550; AVX512-NEXT: vzeroupper
1551; AVX512-NEXT: retq
1552 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1553 %2 = bitcast <64 x i1> %1 to <2 x i32>
1554 %3 = extractelement <2 x i32> %2, i32 0
1555 %4 = extractelement <2 x i32> %2, i32 1
1556 %5 = add i32 %3, %4
1557 ret i32 %5
1558}