blob: 0914f2bff9f64761ba2129abcd14ab8dff2c3ae2 [file] [log] [blame]
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000015; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
16; SSE2-SSSE3-NEXT: movl %ecx, %eax
17; SSE2-SSSE3-NEXT: shrb %al
18; SSE2-SSSE3-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000019; SSE2-SSSE3-NEXT: retq
20;
21; AVX12-LABEL: bitcast_v2i64_to_v2i1:
22; AVX12: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000023; AVX12-NEXT: vmovmskpd %xmm0, %ecx
24; AVX12-NEXT: movl %ecx, %eax
25; AVX12-NEXT: shrb %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000026; AVX12-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000027; AVX12-NEXT: retq
28;
29; AVX512-LABEL: bitcast_v2i64_to_v2i1:
30; AVX512: # %bb.0:
31; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
32; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
33; AVX512-NEXT: kshiftrw $1, %k0, %k1
34; AVX512-NEXT: kmovd %k1, %ecx
35; AVX512-NEXT: kmovd %k0, %eax
36; AVX512-NEXT: addb %cl, %al
37; AVX512-NEXT: # kill: def $al killed $al killed $eax
38; AVX512-NEXT: retq
39 %1 = icmp slt <2 x i64> %a0, zeroinitializer
40 %2 = bitcast <2 x i1> %1 to <2 x i1>
41 %3 = extractelement <2 x i1> %2, i32 0
42 %4 = extractelement <2 x i1> %2, i32 1
43 %5 = add i1 %3, %4
44 ret i1 %5
45}
46
47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000050; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000051; SSE2-SSSE3-NEXT: movl %eax, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000052; SSE2-SSSE3-NEXT: andl $3, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +000053; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000054; SSE2-SSSE3-NEXT: shrl $2, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000055; SSE2-SSSE3-NEXT: movq %rax, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000056; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
57; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
Simon Pilgrim10daecb2019-04-24 17:25:45 +000058; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
59; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
60; SSE2-SSSE3-NEXT: retq
61;
62; AVX12-LABEL: bitcast_v4i32_to_v2i2:
63; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000064; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000065; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000066; AVX12-NEXT: shrl $2, %ecx
67; AVX12-NEXT: vmovd %ecx, %xmm0
68; AVX12-NEXT: andl $3, %eax
69; AVX12-NEXT: vmovd %eax, %xmm1
70; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
71; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000072; AVX12-NEXT: addb %cl, %al
73; AVX12-NEXT: # kill: def $al killed $al killed $eax
74; AVX12-NEXT: retq
75;
76; AVX512-LABEL: bitcast_v4i32_to_v2i2:
77; AVX512: # %bb.0:
78; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
79; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
80; AVX512-NEXT: kmovd %k0, %eax
81; AVX512-NEXT: movzbl %al, %ecx
82; AVX512-NEXT: shrl $2, %ecx
83; AVX512-NEXT: andl $3, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000084; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +000085; AVX512-NEXT: andl $3, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000086; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +000087; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
88; AVX512-NEXT: vpextrb $0, %xmm0, %eax
89; AVX512-NEXT: addb %cl, %al
90; AVX512-NEXT: # kill: def $al killed $al killed $eax
91; AVX512-NEXT: retq
92 %1 = icmp slt <4 x i32> %a0, zeroinitializer
93 %2 = bitcast <4 x i1> %1 to <2 x i2>
94 %3 = extractelement <2 x i2> %2, i32 0
95 %4 = extractelement <2 x i2> %2, i32 1
96 %5 = add i2 %3, %4
97 ret i2 %5
98}
99
100define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
101; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
102; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000103; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
104; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
105; SSE2-SSSE3-NEXT: movzbl %al, %ecx
106; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000107; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
108; SSE2-SSSE3-NEXT: andl $15, %eax
109; SSE2-SSSE3-NEXT: movq %rax, %xmm1
110; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
111; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
112; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
113; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
114; SSE2-SSSE3-NEXT: retq
115;
116; AVX12-LABEL: bitcast_v8i16_to_v2i4:
117; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000118; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
119; AVX12-NEXT: vpmovmskb %xmm0, %eax
120; AVX12-NEXT: movzbl %al, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000121; AVX12-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000122; AVX12-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000123; AVX12-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000124; AVX12-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000125; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
126; AVX12-NEXT: vpextrb $0, %xmm0, %eax
127; AVX12-NEXT: addb %cl, %al
128; AVX12-NEXT: # kill: def $al killed $al killed $eax
129; AVX12-NEXT: retq
130;
131; AVX512-LABEL: bitcast_v8i16_to_v2i4:
132; AVX512: # %bb.0:
133; AVX512-NEXT: vpmovw2m %xmm0, %k0
134; AVX512-NEXT: kmovd %k0, %eax
135; AVX512-NEXT: movzbl %al, %ecx
136; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000137; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000138; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000139; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000140; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
141; AVX512-NEXT: vpextrb $0, %xmm0, %eax
142; AVX512-NEXT: addb %cl, %al
143; AVX512-NEXT: # kill: def $al killed $al killed $eax
144; AVX512-NEXT: retq
145 %1 = icmp slt <8 x i16> %a0, zeroinitializer
146 %2 = bitcast <8 x i1> %1 to <2 x i4>
147 %3 = extractelement <2 x i4> %2, i32 0
148 %4 = extractelement <2 x i4> %2, i32 1
149 %5 = add i4 %3, %4
150 ret i4 %5
151}
152
153define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
154; SSE2-LABEL: bitcast_v16i8_to_v2i8:
155; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000156; SSE2-NEXT: pmovmskb %xmm0, %eax
157; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000158; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
159; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
160; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
161; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
162; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
163; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
164; SSE2-NEXT: retq
165;
166; SSSE3-LABEL: bitcast_v16i8_to_v2i8:
167; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000168; SSSE3-NEXT: pmovmskb %xmm0, %eax
169; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000170; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
171; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
172; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
173; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
174; SSSE3-NEXT: retq
175;
176; AVX12-LABEL: bitcast_v16i8_to_v2i8:
177; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000178; AVX12-NEXT: vpmovmskb %xmm0, %eax
179; AVX12-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000180; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
181; AVX12-NEXT: vpextrb $1, %xmm0, %eax
182; AVX12-NEXT: addb %cl, %al
183; AVX12-NEXT: # kill: def $al killed $al killed $eax
184; AVX12-NEXT: retq
185;
186; AVX512-LABEL: bitcast_v16i8_to_v2i8:
187; AVX512: # %bb.0:
188; AVX512-NEXT: vpmovb2m %xmm0, %k0
189; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
190; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
191; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
192; AVX512-NEXT: vpextrb $1, %xmm0, %eax
193; AVX512-NEXT: addb %cl, %al
194; AVX512-NEXT: # kill: def $al killed $al killed $eax
195; AVX512-NEXT: retq
196 %1 = icmp slt <16 x i8> %a0, zeroinitializer
197 %2 = bitcast <16 x i1> %1 to <2 x i8>
198 %3 = extractelement <2 x i8> %2, i32 0
199 %4 = extractelement <2 x i8> %2, i32 1
200 %5 = add i8 %3, %4
201 ret i8 %5
202}
203
204;
205; 256-bit vectors
206;
207
208define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
209; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
210; SSE2-SSSE3: # %bb.0:
Craig Topper55a71b52019-05-06 19:29:24 +0000211; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
212; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
213; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
214; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
215; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
216; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
217; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
218; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
219; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
220; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
221; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
222; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
223; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
224; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
225; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
226; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000227; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
228; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000229; SSE2-SSSE3-NEXT: movl %eax, %ecx
230; SSE2-SSSE3-NEXT: shrl $2, %ecx
231; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
232; SSE2-SSSE3-NEXT: andl $3, %eax
233; SSE2-SSSE3-NEXT: movq %rax, %xmm1
234; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
235; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
236; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
237; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
238; SSE2-SSSE3-NEXT: retq
239;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000240; AVX12-LABEL: bitcast_v4i64_to_v2i2:
241; AVX12: # %bb.0:
242; AVX12-NEXT: vmovmskpd %ymm0, %eax
243; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000244; AVX12-NEXT: shrl $2, %ecx
245; AVX12-NEXT: vmovd %ecx, %xmm0
246; AVX12-NEXT: andl $3, %eax
247; AVX12-NEXT: vmovd %eax, %xmm1
248; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
249; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000250; AVX12-NEXT: addb %cl, %al
251; AVX12-NEXT: # kill: def $al killed $al killed $eax
252; AVX12-NEXT: vzeroupper
253; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000254;
255; AVX512-LABEL: bitcast_v4i64_to_v2i2:
256; AVX512: # %bb.0:
257; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
258; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
259; AVX512-NEXT: kmovd %k0, %eax
260; AVX512-NEXT: movzbl %al, %ecx
261; AVX512-NEXT: shrl $2, %ecx
262; AVX512-NEXT: andl $3, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000263; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000264; AVX512-NEXT: andl $3, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000265; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000266; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
267; AVX512-NEXT: vpextrb $0, %xmm0, %eax
268; AVX512-NEXT: addb %cl, %al
269; AVX512-NEXT: # kill: def $al killed $al killed $eax
270; AVX512-NEXT: vzeroupper
271; AVX512-NEXT: retq
272 %1 = icmp slt <4 x i64> %a0, zeroinitializer
273 %2 = bitcast <4 x i1> %1 to <2 x i2>
274 %3 = extractelement <2 x i2> %2, i32 0
275 %4 = extractelement <2 x i2> %2, i32 1
276 %5 = add i2 %3, %4
277 ret i2 %5
278}
279
280define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
281; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
282; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000283; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
284; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
285; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
286; SSE2-SSSE3-NEXT: movzbl %al, %ecx
287; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000288; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
289; SSE2-SSSE3-NEXT: andl $15, %eax
290; SSE2-SSSE3-NEXT: movq %rax, %xmm1
291; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
292; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
293; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
294; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
295; SSE2-SSSE3-NEXT: retq
296;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000297; AVX12-LABEL: bitcast_v8i32_to_v2i4:
298; AVX12: # %bb.0:
299; AVX12-NEXT: vmovmskps %ymm0, %eax
300; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000301; AVX12-NEXT: shrl $4, %ecx
302; AVX12-NEXT: vmovd %ecx, %xmm0
303; AVX12-NEXT: andl $15, %eax
304; AVX12-NEXT: vmovd %eax, %xmm1
305; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
306; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000307; AVX12-NEXT: addb %cl, %al
308; AVX12-NEXT: # kill: def $al killed $al killed $eax
309; AVX12-NEXT: vzeroupper
310; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000311;
312; AVX512-LABEL: bitcast_v8i32_to_v2i4:
313; AVX512: # %bb.0:
314; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
315; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
316; AVX512-NEXT: kmovd %k0, %eax
317; AVX512-NEXT: movzbl %al, %ecx
318; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000319; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000320; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000321; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000322; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
323; AVX512-NEXT: vpextrb $0, %xmm0, %eax
324; AVX512-NEXT: addb %cl, %al
325; AVX512-NEXT: # kill: def $al killed $al killed $eax
326; AVX512-NEXT: vzeroupper
327; AVX512-NEXT: retq
328 %1 = icmp slt <8 x i32> %a0, zeroinitializer
329 %2 = bitcast <8 x i1> %1 to <2 x i4>
330 %3 = extractelement <2 x i4> %2, i32 0
331 %4 = extractelement <2 x i4> %2, i32 1
332 %5 = add i4 %3, %4
333 ret i4 %5
334}
335
336define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
337; SSE2-LABEL: bitcast_v16i16_to_v2i8:
338; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000339; SSE2-NEXT: packsswb %xmm1, %xmm0
340; SSE2-NEXT: pmovmskb %xmm0, %eax
341; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000342; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
343; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
344; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
345; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
346; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
347; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
348; SSE2-NEXT: retq
349;
350; SSSE3-LABEL: bitcast_v16i16_to_v2i8:
351; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000352; SSSE3-NEXT: packsswb %xmm1, %xmm0
353; SSSE3-NEXT: pmovmskb %xmm0, %eax
354; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000355; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
356; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
357; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
358; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
359; SSSE3-NEXT: retq
360;
361; AVX1-LABEL: bitcast_v16i16_to_v2i8:
362; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000363; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
364; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
365; AVX1-NEXT: vpmovmskb %xmm0, %eax
366; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000367; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
368; AVX1-NEXT: vpextrb $1, %xmm0, %eax
369; AVX1-NEXT: addb %cl, %al
370; AVX1-NEXT: # kill: def $al killed $al killed $eax
371; AVX1-NEXT: vzeroupper
372; AVX1-NEXT: retq
373;
374; AVX2-LABEL: bitcast_v16i16_to_v2i8:
375; AVX2: # %bb.0:
376; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
377; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000378; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
379; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
380; AVX2-NEXT: vpmovmskb %xmm0, %eax
381; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000382; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
383; AVX2-NEXT: vpextrb $1, %xmm0, %eax
384; AVX2-NEXT: addb %cl, %al
385; AVX2-NEXT: # kill: def $al killed $al killed $eax
386; AVX2-NEXT: vzeroupper
387; AVX2-NEXT: retq
388;
389; AVX512-LABEL: bitcast_v16i16_to_v2i8:
390; AVX512: # %bb.0:
391; AVX512-NEXT: vpmovw2m %ymm0, %k0
392; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
393; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
394; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
395; AVX512-NEXT: vpextrb $1, %xmm0, %eax
396; AVX512-NEXT: addb %cl, %al
397; AVX512-NEXT: # kill: def $al killed $al killed $eax
398; AVX512-NEXT: vzeroupper
399; AVX512-NEXT: retq
400 %1 = icmp slt <16 x i16> %a0, zeroinitializer
401 %2 = bitcast <16 x i1> %1 to <2 x i8>
402 %3 = extractelement <2 x i8> %2, i32 0
403 %4 = extractelement <2 x i8> %2, i32 1
404 %5 = add i8 %3, %4
405 ret i8 %5
406}
407
408define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
409; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
410; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000411; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
412; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
413; SSE2-SSSE3-NEXT: shll $16, %ecx
414; SSE2-SSSE3-NEXT: orl %eax, %ecx
415; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
416; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
417; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
418; SSE2-SSSE3-NEXT: addl %ecx, %eax
419; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000420; SSE2-SSSE3-NEXT: retq
421;
422; AVX1-LABEL: bitcast_v32i8_to_v2i16:
423; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000424; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000425; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000426; AVX1-NEXT: vpmovmskb %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000427; AVX1-NEXT: shll $16, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000428; AVX1-NEXT: orl %eax, %ecx
429; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000430; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
431; AVX1-NEXT: vpextrw $1, %xmm0, %eax
432; AVX1-NEXT: addl %ecx, %eax
433; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000434; AVX1-NEXT: vzeroupper
435; AVX1-NEXT: retq
436;
437; AVX2-LABEL: bitcast_v32i8_to_v2i16:
438; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000439; AVX2-NEXT: vpmovmskb %ymm0, %eax
440; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000441; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
442; AVX2-NEXT: vpextrw $1, %xmm0, %eax
443; AVX2-NEXT: addl %ecx, %eax
444; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000445; AVX2-NEXT: vzeroupper
446; AVX2-NEXT: retq
447;
448; AVX512-LABEL: bitcast_v32i8_to_v2i16:
449; AVX512: # %bb.0:
450; AVX512-NEXT: pushq %rbp
451; AVX512-NEXT: movq %rsp, %rbp
452; AVX512-NEXT: andq $-32, %rsp
453; AVX512-NEXT: subq $32, %rsp
454; AVX512-NEXT: vpmovb2m %ymm0, %k0
455; AVX512-NEXT: kmovd %k0, (%rsp)
456; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
457; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
458; AVX512-NEXT: vpextrw $1, %xmm0, %eax
459; AVX512-NEXT: addl %ecx, %eax
460; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
461; AVX512-NEXT: movq %rbp, %rsp
462; AVX512-NEXT: popq %rbp
463; AVX512-NEXT: vzeroupper
464; AVX512-NEXT: retq
465 %1 = icmp slt <32 x i8> %a0, zeroinitializer
466 %2 = bitcast <32 x i1> %1 to <2 x i16>
467 %3 = extractelement <2 x i16> %2, i32 0
468 %4 = extractelement <2 x i16> %2, i32 1
469 %5 = add i16 %3, %4
470 ret i16 %5
471}
472
473;
474; 512-bit vectors
475;
476
477define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
478; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
479; SSE2-SSSE3: # %bb.0:
480; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000481; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000482; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000483; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000484; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000485; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
486; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
487; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000488; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000489; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
490; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
491; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
492; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
493; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
494; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
495; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
496; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
497; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
498; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
499; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000500; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000501; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
502; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
503; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000504; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
505; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
506; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000507; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
508; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
509; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
510; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
511; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
512; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
513; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000514; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000515; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
516; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
517; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
518; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
519; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
520; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
521; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
522; SSE2-SSSE3-NEXT: movzbl %al, %ecx
523; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000524; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
525; SSE2-SSSE3-NEXT: andl $15, %eax
526; SSE2-SSSE3-NEXT: movq %rax, %xmm1
527; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
528; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
529; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
530; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
531; SSE2-SSSE3-NEXT: retq
532;
533; AVX1-LABEL: bitcast_v8i64_to_v2i4:
534; AVX1: # %bb.0:
Craig Topper55a71b52019-05-06 19:29:24 +0000535; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000536; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000537; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Craig Topper55a71b52019-05-06 19:29:24 +0000538; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
539; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
540; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
541; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000542; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
543; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000544; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
545; AVX1-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000546; AVX1-NEXT: movl %eax, %ecx
547; AVX1-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000548; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000549; AVX1-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000550; AVX1-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000551; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
552; AVX1-NEXT: vpextrb $0, %xmm0, %eax
553; AVX1-NEXT: addb %cl, %al
554; AVX1-NEXT: # kill: def $al killed $al killed $eax
555; AVX1-NEXT: vzeroupper
556; AVX1-NEXT: retq
557;
558; AVX2-LABEL: bitcast_v8i64_to_v2i4:
559; AVX2: # %bb.0:
560; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
561; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000562; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000563; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
564; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
565; AVX2-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000566; AVX2-NEXT: movl %eax, %ecx
567; AVX2-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000568; AVX2-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000569; AVX2-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000570; AVX2-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000571; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
572; AVX2-NEXT: vpextrb $0, %xmm0, %eax
573; AVX2-NEXT: addb %cl, %al
574; AVX2-NEXT: # kill: def $al killed $al killed $eax
575; AVX2-NEXT: vzeroupper
576; AVX2-NEXT: retq
577;
578; AVX512-LABEL: bitcast_v8i64_to_v2i4:
579; AVX512: # %bb.0:
580; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
581; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
582; AVX512-NEXT: kmovd %k0, %eax
583; AVX512-NEXT: movzbl %al, %ecx
584; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000585; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000586; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000587; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000588; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
589; AVX512-NEXT: vpextrb $0, %xmm0, %eax
590; AVX512-NEXT: addb %cl, %al
591; AVX512-NEXT: # kill: def $al killed $al killed $eax
592; AVX512-NEXT: vzeroupper
593; AVX512-NEXT: retq
594 %1 = icmp slt <8 x i64> %a0, zeroinitializer
595 %2 = bitcast <8 x i1> %1 to <2 x i4>
596 %3 = extractelement <2 x i4> %2, i32 0
597 %4 = extractelement <2 x i4> %2, i32 1
598 %5 = add i4 %3, %4
599 ret i4 %5
600}
601
602define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
603; SSE2-LABEL: bitcast_v16i32_to_v2i8:
604; SSE2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000605; SSE2-NEXT: packssdw %xmm3, %xmm2
606; SSE2-NEXT: packssdw %xmm1, %xmm0
607; SSE2-NEXT: packsswb %xmm2, %xmm0
608; SSE2-NEXT: pmovmskb %xmm0, %eax
609; SSE2-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000610; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
611; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
612; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
613; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
614; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
615; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
616; SSE2-NEXT: retq
617;
618; SSSE3-LABEL: bitcast_v16i32_to_v2i8:
619; SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000620; SSSE3-NEXT: packssdw %xmm3, %xmm2
621; SSSE3-NEXT: packssdw %xmm1, %xmm0
622; SSSE3-NEXT: packsswb %xmm2, %xmm0
623; SSSE3-NEXT: pmovmskb %xmm0, %eax
624; SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000625; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
626; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
627; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
628; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
629; SSSE3-NEXT: retq
630;
631; AVX1-LABEL: bitcast_v16i32_to_v2i8:
632; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000633; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
634; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
635; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
636; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
637; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
638; AVX1-NEXT: vpmovmskb %xmm0, %eax
639; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000640; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
641; AVX1-NEXT: vpextrb $1, %xmm0, %eax
642; AVX1-NEXT: addb %cl, %al
643; AVX1-NEXT: # kill: def $al killed $al killed $eax
644; AVX1-NEXT: vzeroupper
645; AVX1-NEXT: retq
646;
647; AVX2-LABEL: bitcast_v16i32_to_v2i8:
648; AVX2: # %bb.0:
649; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000650; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000651; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000652; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
653; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
654; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
655; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
656; AVX2-NEXT: vpmovmskb %xmm0, %eax
657; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000658; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
659; AVX2-NEXT: vpextrb $1, %xmm0, %eax
660; AVX2-NEXT: addb %cl, %al
661; AVX2-NEXT: # kill: def $al killed $al killed $eax
662; AVX2-NEXT: vzeroupper
663; AVX2-NEXT: retq
664;
665; AVX512-LABEL: bitcast_v16i32_to_v2i8:
666; AVX512: # %bb.0:
667; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
668; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
669; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
670; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
671; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
672; AVX512-NEXT: vpextrb $1, %xmm0, %eax
673; AVX512-NEXT: addb %cl, %al
674; AVX512-NEXT: # kill: def $al killed $al killed $eax
675; AVX512-NEXT: vzeroupper
676; AVX512-NEXT: retq
677 %1 = icmp slt <16 x i32> %a0, zeroinitializer
678 %2 = bitcast <16 x i1> %1 to <2 x i8>
679 %3 = extractelement <2 x i8> %2, i32 0
680 %4 = extractelement <2 x i8> %2, i32 1
681 %5 = add i8 %3, %4
682 ret i8 %5
683}
684
685define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
686; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
687; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000688; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
689; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
690; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
691; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
692; SSE2-SSSE3-NEXT: shll $16, %ecx
693; SSE2-SSSE3-NEXT: orl %eax, %ecx
694; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
695; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
696; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
697; SSE2-SSSE3-NEXT: addl %ecx, %eax
698; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000699; SSE2-SSSE3-NEXT: retq
700;
701; AVX1-LABEL: bitcast_v32i16_to_v2i16:
702; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000703; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
704; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
705; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000706; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000707; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
708; AVX1-NEXT: vpmovmskb %xmm0, %ecx
709; AVX1-NEXT: shll $16, %ecx
710; AVX1-NEXT: orl %eax, %ecx
711; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000712; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
713; AVX1-NEXT: vpextrw $1, %xmm0, %eax
714; AVX1-NEXT: addl %ecx, %eax
715; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000716; AVX1-NEXT: vzeroupper
717; AVX1-NEXT: retq
718;
719; AVX2-LABEL: bitcast_v32i16_to_v2i16:
720; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000721; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
722; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
723; AVX2-NEXT: vpmovmskb %ymm0, %eax
724; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000725; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
726; AVX2-NEXT: vpextrw $1, %xmm0, %eax
727; AVX2-NEXT: addl %ecx, %eax
728; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000729; AVX2-NEXT: vzeroupper
730; AVX2-NEXT: retq
731;
732; AVX512-LABEL: bitcast_v32i16_to_v2i16:
733; AVX512: # %bb.0:
734; AVX512-NEXT: pushq %rbp
735; AVX512-NEXT: movq %rsp, %rbp
736; AVX512-NEXT: andq $-32, %rsp
737; AVX512-NEXT: subq $32, %rsp
738; AVX512-NEXT: vpmovw2m %zmm0, %k0
739; AVX512-NEXT: kmovd %k0, (%rsp)
740; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
741; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
742; AVX512-NEXT: vpextrw $1, %xmm0, %eax
743; AVX512-NEXT: addl %ecx, %eax
744; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
745; AVX512-NEXT: movq %rbp, %rsp
746; AVX512-NEXT: popq %rbp
747; AVX512-NEXT: vzeroupper
748; AVX512-NEXT: retq
749 %1 = icmp slt <32 x i16> %a0, zeroinitializer
750 %2 = bitcast <32 x i1> %1 to <2 x i16>
751 %3 = extractelement <2 x i16> %2, i32 0
752 %4 = extractelement <2 x i16> %2, i32 1
753 %5 = add i16 %3, %4
754 ret i16 %5
755}
756
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000757define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
758; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000759; SSE2-SSSE3: # %bb.0:
Craig Topperf9c30ed2019-04-25 18:19:59 +0000760; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4
761; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
762; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm5
763; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
764; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
765; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm3
766; SSE2-SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
767; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
768; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm2
769; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
770; SSE2-SSSE3-NEXT: pcmpgtb %xmm0, %xmm4
771; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
772; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
773; SSE2-SSSE3-NEXT: andl $1, %eax
774; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
775; SSE2-SSSE3-NEXT: andl $1, %ecx
776; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
777; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
778; SSE2-SSSE3-NEXT: andl $1, %ecx
779; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
780; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
781; SSE2-SSSE3-NEXT: andl $1, %ecx
782; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
783; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
784; SSE2-SSSE3-NEXT: andl $1, %ecx
785; SSE2-SSSE3-NEXT: shll $4, %ecx
786; SSE2-SSSE3-NEXT: orl %eax, %ecx
787; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
788; SSE2-SSSE3-NEXT: andl $1, %eax
789; SSE2-SSSE3-NEXT: shll $5, %eax
790; SSE2-SSSE3-NEXT: orl %ecx, %eax
791; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
792; SSE2-SSSE3-NEXT: andl $1, %ecx
793; SSE2-SSSE3-NEXT: shll $6, %ecx
794; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
795; SSE2-SSSE3-NEXT: andl $1, %edx
796; SSE2-SSSE3-NEXT: shll $7, %edx
797; SSE2-SSSE3-NEXT: orl %ecx, %edx
798; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
799; SSE2-SSSE3-NEXT: andl $1, %ecx
800; SSE2-SSSE3-NEXT: shll $8, %ecx
801; SSE2-SSSE3-NEXT: orl %edx, %ecx
802; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
803; SSE2-SSSE3-NEXT: andl $1, %edx
804; SSE2-SSSE3-NEXT: shll $9, %edx
805; SSE2-SSSE3-NEXT: orl %ecx, %edx
806; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
807; SSE2-SSSE3-NEXT: andl $1, %ecx
808; SSE2-SSSE3-NEXT: shll $10, %ecx
809; SSE2-SSSE3-NEXT: orl %edx, %ecx
810; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
811; SSE2-SSSE3-NEXT: andl $1, %edx
812; SSE2-SSSE3-NEXT: shll $11, %edx
813; SSE2-SSSE3-NEXT: orl %ecx, %edx
814; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
815; SSE2-SSSE3-NEXT: andl $1, %ecx
816; SSE2-SSSE3-NEXT: shll $12, %ecx
817; SSE2-SSSE3-NEXT: orl %edx, %ecx
818; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
819; SSE2-SSSE3-NEXT: andl $1, %edx
820; SSE2-SSSE3-NEXT: shll $13, %edx
821; SSE2-SSSE3-NEXT: orl %ecx, %edx
822; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
823; SSE2-SSSE3-NEXT: andl $1, %ecx
824; SSE2-SSSE3-NEXT: shll $14, %ecx
825; SSE2-SSSE3-NEXT: orl %edx, %ecx
826; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
827; SSE2-SSSE3-NEXT: shll $15, %edx
828; SSE2-SSSE3-NEXT: orl %ecx, %edx
829; SSE2-SSSE3-NEXT: orl %eax, %edx
830; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
831; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
832; SSE2-SSSE3-NEXT: andl $1, %eax
833; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
834; SSE2-SSSE3-NEXT: andl $1, %ecx
835; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
836; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
837; SSE2-SSSE3-NEXT: andl $1, %ecx
838; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
839; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
840; SSE2-SSSE3-NEXT: andl $1, %ecx
841; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
842; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
843; SSE2-SSSE3-NEXT: andl $1, %ecx
844; SSE2-SSSE3-NEXT: shll $4, %ecx
845; SSE2-SSSE3-NEXT: orl %eax, %ecx
846; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
847; SSE2-SSSE3-NEXT: andl $1, %eax
848; SSE2-SSSE3-NEXT: shll $5, %eax
849; SSE2-SSSE3-NEXT: orl %ecx, %eax
850; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
851; SSE2-SSSE3-NEXT: andl $1, %ecx
852; SSE2-SSSE3-NEXT: shll $6, %ecx
853; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
854; SSE2-SSSE3-NEXT: andl $1, %edx
855; SSE2-SSSE3-NEXT: shll $7, %edx
856; SSE2-SSSE3-NEXT: orl %ecx, %edx
857; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
858; SSE2-SSSE3-NEXT: andl $1, %ecx
859; SSE2-SSSE3-NEXT: shll $8, %ecx
860; SSE2-SSSE3-NEXT: orl %edx, %ecx
861; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
862; SSE2-SSSE3-NEXT: andl $1, %edx
863; SSE2-SSSE3-NEXT: shll $9, %edx
864; SSE2-SSSE3-NEXT: orl %ecx, %edx
865; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
866; SSE2-SSSE3-NEXT: andl $1, %ecx
867; SSE2-SSSE3-NEXT: shll $10, %ecx
868; SSE2-SSSE3-NEXT: orl %edx, %ecx
869; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
870; SSE2-SSSE3-NEXT: andl $1, %edx
871; SSE2-SSSE3-NEXT: shll $11, %edx
872; SSE2-SSSE3-NEXT: orl %ecx, %edx
873; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
874; SSE2-SSSE3-NEXT: andl $1, %ecx
875; SSE2-SSSE3-NEXT: shll $12, %ecx
876; SSE2-SSSE3-NEXT: orl %edx, %ecx
877; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
878; SSE2-SSSE3-NEXT: andl $1, %edx
879; SSE2-SSSE3-NEXT: shll $13, %edx
880; SSE2-SSSE3-NEXT: orl %ecx, %edx
881; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
882; SSE2-SSSE3-NEXT: andl $1, %ecx
883; SSE2-SSSE3-NEXT: shll $14, %ecx
884; SSE2-SSSE3-NEXT: orl %edx, %ecx
885; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
886; SSE2-SSSE3-NEXT: shll $15, %edx
887; SSE2-SSSE3-NEXT: orl %ecx, %edx
888; SSE2-SSSE3-NEXT: orl %eax, %edx
889; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
890; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
891; SSE2-SSSE3-NEXT: andl $1, %eax
892; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
893; SSE2-SSSE3-NEXT: andl $1, %ecx
894; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
895; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
896; SSE2-SSSE3-NEXT: andl $1, %ecx
897; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
898; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
899; SSE2-SSSE3-NEXT: andl $1, %ecx
900; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
901; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
902; SSE2-SSSE3-NEXT: andl $1, %ecx
903; SSE2-SSSE3-NEXT: shll $4, %ecx
904; SSE2-SSSE3-NEXT: orl %eax, %ecx
905; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
906; SSE2-SSSE3-NEXT: andl $1, %eax
907; SSE2-SSSE3-NEXT: shll $5, %eax
908; SSE2-SSSE3-NEXT: orl %ecx, %eax
909; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
910; SSE2-SSSE3-NEXT: andl $1, %ecx
911; SSE2-SSSE3-NEXT: shll $6, %ecx
912; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
913; SSE2-SSSE3-NEXT: andl $1, %edx
914; SSE2-SSSE3-NEXT: shll $7, %edx
915; SSE2-SSSE3-NEXT: orl %ecx, %edx
916; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
917; SSE2-SSSE3-NEXT: andl $1, %ecx
918; SSE2-SSSE3-NEXT: shll $8, %ecx
919; SSE2-SSSE3-NEXT: orl %edx, %ecx
920; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
921; SSE2-SSSE3-NEXT: andl $1, %edx
922; SSE2-SSSE3-NEXT: shll $9, %edx
923; SSE2-SSSE3-NEXT: orl %ecx, %edx
924; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
925; SSE2-SSSE3-NEXT: andl $1, %ecx
926; SSE2-SSSE3-NEXT: shll $10, %ecx
927; SSE2-SSSE3-NEXT: orl %edx, %ecx
928; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
929; SSE2-SSSE3-NEXT: andl $1, %edx
930; SSE2-SSSE3-NEXT: shll $11, %edx
931; SSE2-SSSE3-NEXT: orl %ecx, %edx
932; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
933; SSE2-SSSE3-NEXT: andl $1, %ecx
934; SSE2-SSSE3-NEXT: shll $12, %ecx
935; SSE2-SSSE3-NEXT: orl %edx, %ecx
936; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
937; SSE2-SSSE3-NEXT: andl $1, %edx
938; SSE2-SSSE3-NEXT: shll $13, %edx
939; SSE2-SSSE3-NEXT: orl %ecx, %edx
940; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
941; SSE2-SSSE3-NEXT: andl $1, %ecx
942; SSE2-SSSE3-NEXT: shll $14, %ecx
943; SSE2-SSSE3-NEXT: orl %edx, %ecx
944; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
945; SSE2-SSSE3-NEXT: shll $15, %edx
946; SSE2-SSSE3-NEXT: orl %ecx, %edx
947; SSE2-SSSE3-NEXT: orl %eax, %edx
948; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
949; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
950; SSE2-SSSE3-NEXT: andl $1, %eax
951; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
952; SSE2-SSSE3-NEXT: andl $1, %ecx
953; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
954; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
955; SSE2-SSSE3-NEXT: andl $1, %ecx
956; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
957; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
958; SSE2-SSSE3-NEXT: andl $1, %ecx
959; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
960; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
961; SSE2-SSSE3-NEXT: andl $1, %ecx
962; SSE2-SSSE3-NEXT: shll $4, %ecx
963; SSE2-SSSE3-NEXT: orl %eax, %ecx
964; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
965; SSE2-SSSE3-NEXT: andl $1, %eax
966; SSE2-SSSE3-NEXT: shll $5, %eax
967; SSE2-SSSE3-NEXT: orl %ecx, %eax
968; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
969; SSE2-SSSE3-NEXT: andl $1, %ecx
970; SSE2-SSSE3-NEXT: shll $6, %ecx
971; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
972; SSE2-SSSE3-NEXT: andl $1, %edx
973; SSE2-SSSE3-NEXT: shll $7, %edx
974; SSE2-SSSE3-NEXT: orl %ecx, %edx
975; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
976; SSE2-SSSE3-NEXT: andl $1, %ecx
977; SSE2-SSSE3-NEXT: shll $8, %ecx
978; SSE2-SSSE3-NEXT: orl %edx, %ecx
979; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
980; SSE2-SSSE3-NEXT: andl $1, %edx
981; SSE2-SSSE3-NEXT: shll $9, %edx
982; SSE2-SSSE3-NEXT: orl %ecx, %edx
983; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
984; SSE2-SSSE3-NEXT: andl $1, %ecx
985; SSE2-SSSE3-NEXT: shll $10, %ecx
986; SSE2-SSSE3-NEXT: orl %edx, %ecx
987; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
988; SSE2-SSSE3-NEXT: andl $1, %edx
989; SSE2-SSSE3-NEXT: shll $11, %edx
990; SSE2-SSSE3-NEXT: orl %ecx, %edx
991; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
992; SSE2-SSSE3-NEXT: andl $1, %ecx
993; SSE2-SSSE3-NEXT: shll $12, %ecx
994; SSE2-SSSE3-NEXT: orl %edx, %ecx
995; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
996; SSE2-SSSE3-NEXT: andl $1, %edx
997; SSE2-SSSE3-NEXT: shll $13, %edx
998; SSE2-SSSE3-NEXT: orl %ecx, %edx
999; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
1000; SSE2-SSSE3-NEXT: andl $1, %ecx
1001; SSE2-SSSE3-NEXT: shll $14, %ecx
1002; SSE2-SSSE3-NEXT: orl %edx, %ecx
1003; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
1004; SSE2-SSSE3-NEXT: shll $15, %edx
1005; SSE2-SSSE3-NEXT: orl %ecx, %edx
1006; SSE2-SSSE3-NEXT: orl %eax, %edx
1007; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
1008; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1009; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
1010; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1]
1011; SSE2-SSSE3-NEXT: movd %xmm0, %eax
1012; SSE2-SSSE3-NEXT: addl %ecx, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001013; SSE2-SSSE3-NEXT: retq
1014;
Craig Topperf9c30ed2019-04-25 18:19:59 +00001015; AVX1-LABEL: bitcast_v64i8_to_v2i32:
1016; AVX1: # %bb.0:
1017; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1018; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
1019; AVX1-NEXT: vpextrb $1, %xmm3, %eax
1020; AVX1-NEXT: andl $1, %eax
1021; AVX1-NEXT: vpextrb $0, %xmm3, %ecx
1022; AVX1-NEXT: andl $1, %ecx
1023; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1024; AVX1-NEXT: vpextrb $2, %xmm3, %ecx
1025; AVX1-NEXT: andl $1, %ecx
1026; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1027; AVX1-NEXT: vpextrb $3, %xmm3, %ecx
1028; AVX1-NEXT: andl $1, %ecx
1029; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1030; AVX1-NEXT: vpextrb $4, %xmm3, %ecx
1031; AVX1-NEXT: andl $1, %ecx
1032; AVX1-NEXT: shll $4, %ecx
1033; AVX1-NEXT: orl %eax, %ecx
1034; AVX1-NEXT: vpextrb $5, %xmm3, %eax
1035; AVX1-NEXT: andl $1, %eax
1036; AVX1-NEXT: shll $5, %eax
1037; AVX1-NEXT: orl %ecx, %eax
1038; AVX1-NEXT: vpextrb $6, %xmm3, %ecx
1039; AVX1-NEXT: andl $1, %ecx
1040; AVX1-NEXT: shll $6, %ecx
1041; AVX1-NEXT: vpextrb $7, %xmm3, %edx
1042; AVX1-NEXT: andl $1, %edx
1043; AVX1-NEXT: shll $7, %edx
1044; AVX1-NEXT: orl %ecx, %edx
1045; AVX1-NEXT: vpextrb $8, %xmm3, %ecx
1046; AVX1-NEXT: andl $1, %ecx
1047; AVX1-NEXT: shll $8, %ecx
1048; AVX1-NEXT: orl %edx, %ecx
1049; AVX1-NEXT: vpextrb $9, %xmm3, %edx
1050; AVX1-NEXT: andl $1, %edx
1051; AVX1-NEXT: shll $9, %edx
1052; AVX1-NEXT: orl %ecx, %edx
1053; AVX1-NEXT: vpextrb $10, %xmm3, %ecx
1054; AVX1-NEXT: andl $1, %ecx
1055; AVX1-NEXT: shll $10, %ecx
1056; AVX1-NEXT: orl %edx, %ecx
1057; AVX1-NEXT: vpextrb $11, %xmm3, %edx
1058; AVX1-NEXT: andl $1, %edx
1059; AVX1-NEXT: shll $11, %edx
1060; AVX1-NEXT: orl %ecx, %edx
1061; AVX1-NEXT: vpextrb $12, %xmm3, %ecx
1062; AVX1-NEXT: andl $1, %ecx
1063; AVX1-NEXT: shll $12, %ecx
1064; AVX1-NEXT: orl %edx, %ecx
1065; AVX1-NEXT: vpextrb $13, %xmm3, %edx
1066; AVX1-NEXT: andl $1, %edx
1067; AVX1-NEXT: shll $13, %edx
1068; AVX1-NEXT: orl %ecx, %edx
1069; AVX1-NEXT: vpextrb $14, %xmm3, %ecx
1070; AVX1-NEXT: andl $1, %ecx
1071; AVX1-NEXT: shll $14, %ecx
1072; AVX1-NEXT: orl %edx, %ecx
1073; AVX1-NEXT: vpextrb $15, %xmm3, %edx
1074; AVX1-NEXT: andl $1, %edx
1075; AVX1-NEXT: shll $15, %edx
1076; AVX1-NEXT: orl %ecx, %edx
1077; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1078; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1079; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1080; AVX1-NEXT: andl $1, %ecx
1081; AVX1-NEXT: shll $16, %ecx
1082; AVX1-NEXT: orl %edx, %ecx
1083; AVX1-NEXT: vpextrb $1, %xmm1, %edx
1084; AVX1-NEXT: andl $1, %edx
1085; AVX1-NEXT: shll $17, %edx
1086; AVX1-NEXT: orl %ecx, %edx
1087; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1088; AVX1-NEXT: andl $1, %ecx
1089; AVX1-NEXT: shll $18, %ecx
1090; AVX1-NEXT: orl %edx, %ecx
1091; AVX1-NEXT: vpextrb $3, %xmm1, %edx
1092; AVX1-NEXT: andl $1, %edx
1093; AVX1-NEXT: shll $19, %edx
1094; AVX1-NEXT: orl %ecx, %edx
1095; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1096; AVX1-NEXT: andl $1, %ecx
1097; AVX1-NEXT: shll $20, %ecx
1098; AVX1-NEXT: orl %edx, %ecx
1099; AVX1-NEXT: vpextrb $5, %xmm1, %edx
1100; AVX1-NEXT: andl $1, %edx
1101; AVX1-NEXT: shll $21, %edx
1102; AVX1-NEXT: orl %ecx, %edx
1103; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1104; AVX1-NEXT: andl $1, %ecx
1105; AVX1-NEXT: shll $22, %ecx
1106; AVX1-NEXT: orl %edx, %ecx
1107; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1108; AVX1-NEXT: andl $1, %edx
1109; AVX1-NEXT: shll $23, %edx
1110; AVX1-NEXT: orl %ecx, %edx
1111; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1112; AVX1-NEXT: andl $1, %ecx
1113; AVX1-NEXT: shll $24, %ecx
1114; AVX1-NEXT: orl %edx, %ecx
1115; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1116; AVX1-NEXT: andl $1, %edx
1117; AVX1-NEXT: shll $25, %edx
1118; AVX1-NEXT: orl %ecx, %edx
1119; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1120; AVX1-NEXT: andl $1, %ecx
1121; AVX1-NEXT: shll $26, %ecx
1122; AVX1-NEXT: orl %edx, %ecx
1123; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1124; AVX1-NEXT: andl $1, %edx
1125; AVX1-NEXT: shll $27, %edx
1126; AVX1-NEXT: orl %ecx, %edx
1127; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1128; AVX1-NEXT: andl $1, %ecx
1129; AVX1-NEXT: shll $28, %ecx
1130; AVX1-NEXT: orl %edx, %ecx
1131; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1132; AVX1-NEXT: andl $1, %edx
1133; AVX1-NEXT: shll $29, %edx
1134; AVX1-NEXT: orl %ecx, %edx
1135; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1136; AVX1-NEXT: andl $1, %ecx
1137; AVX1-NEXT: shll $30, %ecx
1138; AVX1-NEXT: orl %edx, %ecx
1139; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1140; AVX1-NEXT: shll $31, %edx
1141; AVX1-NEXT: orl %ecx, %edx
1142; AVX1-NEXT: orl %eax, %edx
1143; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1144; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm1
1145; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1146; AVX1-NEXT: andl $1, %eax
1147; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1148; AVX1-NEXT: andl $1, %ecx
1149; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1150; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1151; AVX1-NEXT: andl $1, %ecx
1152; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1153; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
1154; AVX1-NEXT: andl $1, %ecx
1155; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1156; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1157; AVX1-NEXT: andl $1, %ecx
1158; AVX1-NEXT: shll $4, %ecx
1159; AVX1-NEXT: orl %eax, %ecx
1160; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1161; AVX1-NEXT: andl $1, %eax
1162; AVX1-NEXT: shll $5, %eax
1163; AVX1-NEXT: orl %ecx, %eax
1164; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1165; AVX1-NEXT: andl $1, %ecx
1166; AVX1-NEXT: shll $6, %ecx
1167; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1168; AVX1-NEXT: andl $1, %edx
1169; AVX1-NEXT: shll $7, %edx
1170; AVX1-NEXT: orl %ecx, %edx
1171; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1172; AVX1-NEXT: andl $1, %ecx
1173; AVX1-NEXT: shll $8, %ecx
1174; AVX1-NEXT: orl %edx, %ecx
1175; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1176; AVX1-NEXT: andl $1, %edx
1177; AVX1-NEXT: shll $9, %edx
1178; AVX1-NEXT: orl %ecx, %edx
1179; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1180; AVX1-NEXT: andl $1, %ecx
1181; AVX1-NEXT: shll $10, %ecx
1182; AVX1-NEXT: orl %edx, %ecx
1183; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1184; AVX1-NEXT: andl $1, %edx
1185; AVX1-NEXT: shll $11, %edx
1186; AVX1-NEXT: orl %ecx, %edx
1187; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1188; AVX1-NEXT: andl $1, %ecx
1189; AVX1-NEXT: shll $12, %ecx
1190; AVX1-NEXT: orl %edx, %ecx
1191; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1192; AVX1-NEXT: andl $1, %edx
1193; AVX1-NEXT: shll $13, %edx
1194; AVX1-NEXT: orl %ecx, %edx
1195; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1196; AVX1-NEXT: andl $1, %ecx
1197; AVX1-NEXT: shll $14, %ecx
1198; AVX1-NEXT: orl %edx, %ecx
1199; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1200; AVX1-NEXT: andl $1, %edx
1201; AVX1-NEXT: shll $15, %edx
1202; AVX1-NEXT: orl %ecx, %edx
1203; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1204; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1205; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
1206; AVX1-NEXT: andl $1, %ecx
1207; AVX1-NEXT: shll $16, %ecx
1208; AVX1-NEXT: orl %edx, %ecx
1209; AVX1-NEXT: vpextrb $1, %xmm0, %edx
1210; AVX1-NEXT: andl $1, %edx
1211; AVX1-NEXT: shll $17, %edx
1212; AVX1-NEXT: orl %ecx, %edx
1213; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
1214; AVX1-NEXT: andl $1, %ecx
1215; AVX1-NEXT: shll $18, %ecx
1216; AVX1-NEXT: orl %edx, %ecx
1217; AVX1-NEXT: vpextrb $3, %xmm0, %edx
1218; AVX1-NEXT: andl $1, %edx
1219; AVX1-NEXT: shll $19, %edx
1220; AVX1-NEXT: orl %ecx, %edx
1221; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
1222; AVX1-NEXT: andl $1, %ecx
1223; AVX1-NEXT: shll $20, %ecx
1224; AVX1-NEXT: orl %edx, %ecx
1225; AVX1-NEXT: vpextrb $5, %xmm0, %edx
1226; AVX1-NEXT: andl $1, %edx
1227; AVX1-NEXT: shll $21, %edx
1228; AVX1-NEXT: orl %ecx, %edx
1229; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
1230; AVX1-NEXT: andl $1, %ecx
1231; AVX1-NEXT: shll $22, %ecx
1232; AVX1-NEXT: orl %edx, %ecx
1233; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1234; AVX1-NEXT: andl $1, %edx
1235; AVX1-NEXT: shll $23, %edx
1236; AVX1-NEXT: orl %ecx, %edx
1237; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1238; AVX1-NEXT: andl $1, %ecx
1239; AVX1-NEXT: shll $24, %ecx
1240; AVX1-NEXT: orl %edx, %ecx
1241; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1242; AVX1-NEXT: andl $1, %edx
1243; AVX1-NEXT: shll $25, %edx
1244; AVX1-NEXT: orl %ecx, %edx
1245; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1246; AVX1-NEXT: andl $1, %ecx
1247; AVX1-NEXT: shll $26, %ecx
1248; AVX1-NEXT: orl %edx, %ecx
1249; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1250; AVX1-NEXT: andl $1, %edx
1251; AVX1-NEXT: shll $27, %edx
1252; AVX1-NEXT: orl %ecx, %edx
1253; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1254; AVX1-NEXT: andl $1, %ecx
1255; AVX1-NEXT: shll $28, %ecx
1256; AVX1-NEXT: orl %edx, %ecx
1257; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1258; AVX1-NEXT: andl $1, %edx
1259; AVX1-NEXT: shll $29, %edx
1260; AVX1-NEXT: orl %ecx, %edx
1261; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1262; AVX1-NEXT: andl $1, %ecx
1263; AVX1-NEXT: shll $30, %ecx
1264; AVX1-NEXT: orl %edx, %ecx
1265; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1266; AVX1-NEXT: shll $31, %edx
1267; AVX1-NEXT: orl %ecx, %edx
1268; AVX1-NEXT: orl %eax, %edx
1269; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1270; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1271; AVX1-NEXT: vmovd %xmm0, %ecx
1272; AVX1-NEXT: vpextrd $1, %xmm0, %eax
1273; AVX1-NEXT: addl %ecx, %eax
1274; AVX1-NEXT: vzeroupper
1275; AVX1-NEXT: retq
1276;
1277; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1278; AVX2: # %bb.0:
1279; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1280; AVX2-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
1281; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1282; AVX2-NEXT: andl $1, %eax
1283; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1284; AVX2-NEXT: andl $1, %ecx
1285; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1286; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1287; AVX2-NEXT: andl $1, %ecx
1288; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1289; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1290; AVX2-NEXT: andl $1, %ecx
1291; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1292; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1293; AVX2-NEXT: andl $1, %ecx
1294; AVX2-NEXT: shll $4, %ecx
1295; AVX2-NEXT: orl %eax, %ecx
1296; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1297; AVX2-NEXT: andl $1, %eax
1298; AVX2-NEXT: shll $5, %eax
1299; AVX2-NEXT: orl %ecx, %eax
1300; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1301; AVX2-NEXT: andl $1, %ecx
1302; AVX2-NEXT: shll $6, %ecx
1303; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1304; AVX2-NEXT: andl $1, %edx
1305; AVX2-NEXT: shll $7, %edx
1306; AVX2-NEXT: orl %ecx, %edx
1307; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1308; AVX2-NEXT: andl $1, %ecx
1309; AVX2-NEXT: shll $8, %ecx
1310; AVX2-NEXT: orl %edx, %ecx
1311; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1312; AVX2-NEXT: andl $1, %edx
1313; AVX2-NEXT: shll $9, %edx
1314; AVX2-NEXT: orl %ecx, %edx
1315; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1316; AVX2-NEXT: andl $1, %ecx
1317; AVX2-NEXT: shll $10, %ecx
1318; AVX2-NEXT: orl %edx, %ecx
1319; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1320; AVX2-NEXT: andl $1, %edx
1321; AVX2-NEXT: shll $11, %edx
1322; AVX2-NEXT: orl %ecx, %edx
1323; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1324; AVX2-NEXT: andl $1, %ecx
1325; AVX2-NEXT: shll $12, %ecx
1326; AVX2-NEXT: orl %edx, %ecx
1327; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1328; AVX2-NEXT: andl $1, %edx
1329; AVX2-NEXT: shll $13, %edx
1330; AVX2-NEXT: orl %ecx, %edx
1331; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1332; AVX2-NEXT: andl $1, %ecx
1333; AVX2-NEXT: shll $14, %ecx
1334; AVX2-NEXT: orl %edx, %ecx
1335; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1336; AVX2-NEXT: andl $1, %edx
1337; AVX2-NEXT: shll $15, %edx
1338; AVX2-NEXT: orl %ecx, %edx
1339; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1340; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1341; AVX2-NEXT: andl $1, %ecx
1342; AVX2-NEXT: shll $16, %ecx
1343; AVX2-NEXT: orl %edx, %ecx
1344; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1345; AVX2-NEXT: andl $1, %edx
1346; AVX2-NEXT: shll $17, %edx
1347; AVX2-NEXT: orl %ecx, %edx
1348; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1349; AVX2-NEXT: andl $1, %ecx
1350; AVX2-NEXT: shll $18, %ecx
1351; AVX2-NEXT: orl %edx, %ecx
1352; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1353; AVX2-NEXT: andl $1, %edx
1354; AVX2-NEXT: shll $19, %edx
1355; AVX2-NEXT: orl %ecx, %edx
1356; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1357; AVX2-NEXT: andl $1, %ecx
1358; AVX2-NEXT: shll $20, %ecx
1359; AVX2-NEXT: orl %edx, %ecx
1360; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1361; AVX2-NEXT: andl $1, %edx
1362; AVX2-NEXT: shll $21, %edx
1363; AVX2-NEXT: orl %ecx, %edx
1364; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1365; AVX2-NEXT: andl $1, %ecx
1366; AVX2-NEXT: shll $22, %ecx
1367; AVX2-NEXT: orl %edx, %ecx
1368; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1369; AVX2-NEXT: andl $1, %edx
1370; AVX2-NEXT: shll $23, %edx
1371; AVX2-NEXT: orl %ecx, %edx
1372; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1373; AVX2-NEXT: andl $1, %ecx
1374; AVX2-NEXT: shll $24, %ecx
1375; AVX2-NEXT: orl %edx, %ecx
1376; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1377; AVX2-NEXT: andl $1, %edx
1378; AVX2-NEXT: shll $25, %edx
1379; AVX2-NEXT: orl %ecx, %edx
1380; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1381; AVX2-NEXT: andl $1, %ecx
1382; AVX2-NEXT: shll $26, %ecx
1383; AVX2-NEXT: orl %edx, %ecx
1384; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1385; AVX2-NEXT: andl $1, %edx
1386; AVX2-NEXT: shll $27, %edx
1387; AVX2-NEXT: orl %ecx, %edx
1388; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1389; AVX2-NEXT: andl $1, %ecx
1390; AVX2-NEXT: shll $28, %ecx
1391; AVX2-NEXT: orl %edx, %ecx
1392; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1393; AVX2-NEXT: andl $1, %edx
1394; AVX2-NEXT: shll $29, %edx
1395; AVX2-NEXT: orl %ecx, %edx
1396; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1397; AVX2-NEXT: andl $1, %ecx
1398; AVX2-NEXT: shll $30, %ecx
1399; AVX2-NEXT: orl %edx, %ecx
1400; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1401; AVX2-NEXT: shll $31, %edx
1402; AVX2-NEXT: orl %ecx, %edx
1403; AVX2-NEXT: orl %eax, %edx
1404; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1405; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1406; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1407; AVX2-NEXT: andl $1, %eax
1408; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1409; AVX2-NEXT: andl $1, %ecx
1410; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1411; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1412; AVX2-NEXT: andl $1, %ecx
1413; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1414; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1415; AVX2-NEXT: andl $1, %ecx
1416; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1417; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1418; AVX2-NEXT: andl $1, %ecx
1419; AVX2-NEXT: shll $4, %ecx
1420; AVX2-NEXT: orl %eax, %ecx
1421; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1422; AVX2-NEXT: andl $1, %eax
1423; AVX2-NEXT: shll $5, %eax
1424; AVX2-NEXT: orl %ecx, %eax
1425; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1426; AVX2-NEXT: andl $1, %ecx
1427; AVX2-NEXT: shll $6, %ecx
1428; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1429; AVX2-NEXT: andl $1, %edx
1430; AVX2-NEXT: shll $7, %edx
1431; AVX2-NEXT: orl %ecx, %edx
1432; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1433; AVX2-NEXT: andl $1, %ecx
1434; AVX2-NEXT: shll $8, %ecx
1435; AVX2-NEXT: orl %edx, %ecx
1436; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1437; AVX2-NEXT: andl $1, %edx
1438; AVX2-NEXT: shll $9, %edx
1439; AVX2-NEXT: orl %ecx, %edx
1440; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1441; AVX2-NEXT: andl $1, %ecx
1442; AVX2-NEXT: shll $10, %ecx
1443; AVX2-NEXT: orl %edx, %ecx
1444; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1445; AVX2-NEXT: andl $1, %edx
1446; AVX2-NEXT: shll $11, %edx
1447; AVX2-NEXT: orl %ecx, %edx
1448; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1449; AVX2-NEXT: andl $1, %ecx
1450; AVX2-NEXT: shll $12, %ecx
1451; AVX2-NEXT: orl %edx, %ecx
1452; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1453; AVX2-NEXT: andl $1, %edx
1454; AVX2-NEXT: shll $13, %edx
1455; AVX2-NEXT: orl %ecx, %edx
1456; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1457; AVX2-NEXT: andl $1, %ecx
1458; AVX2-NEXT: shll $14, %ecx
1459; AVX2-NEXT: orl %edx, %ecx
1460; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1461; AVX2-NEXT: andl $1, %edx
1462; AVX2-NEXT: shll $15, %edx
1463; AVX2-NEXT: orl %ecx, %edx
1464; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1465; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1466; AVX2-NEXT: andl $1, %ecx
1467; AVX2-NEXT: shll $16, %ecx
1468; AVX2-NEXT: orl %edx, %ecx
1469; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1470; AVX2-NEXT: andl $1, %edx
1471; AVX2-NEXT: shll $17, %edx
1472; AVX2-NEXT: orl %ecx, %edx
1473; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1474; AVX2-NEXT: andl $1, %ecx
1475; AVX2-NEXT: shll $18, %ecx
1476; AVX2-NEXT: orl %edx, %ecx
1477; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1478; AVX2-NEXT: andl $1, %edx
1479; AVX2-NEXT: shll $19, %edx
1480; AVX2-NEXT: orl %ecx, %edx
1481; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1482; AVX2-NEXT: andl $1, %ecx
1483; AVX2-NEXT: shll $20, %ecx
1484; AVX2-NEXT: orl %edx, %ecx
1485; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1486; AVX2-NEXT: andl $1, %edx
1487; AVX2-NEXT: shll $21, %edx
1488; AVX2-NEXT: orl %ecx, %edx
1489; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1490; AVX2-NEXT: andl $1, %ecx
1491; AVX2-NEXT: shll $22, %ecx
1492; AVX2-NEXT: orl %edx, %ecx
1493; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1494; AVX2-NEXT: andl $1, %edx
1495; AVX2-NEXT: shll $23, %edx
1496; AVX2-NEXT: orl %ecx, %edx
1497; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1498; AVX2-NEXT: andl $1, %ecx
1499; AVX2-NEXT: shll $24, %ecx
1500; AVX2-NEXT: orl %edx, %ecx
1501; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1502; AVX2-NEXT: andl $1, %edx
1503; AVX2-NEXT: shll $25, %edx
1504; AVX2-NEXT: orl %ecx, %edx
1505; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1506; AVX2-NEXT: andl $1, %ecx
1507; AVX2-NEXT: shll $26, %ecx
1508; AVX2-NEXT: orl %edx, %ecx
1509; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1510; AVX2-NEXT: andl $1, %edx
1511; AVX2-NEXT: shll $27, %edx
1512; AVX2-NEXT: orl %ecx, %edx
1513; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1514; AVX2-NEXT: andl $1, %ecx
1515; AVX2-NEXT: shll $28, %ecx
1516; AVX2-NEXT: orl %edx, %ecx
1517; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1518; AVX2-NEXT: andl $1, %edx
1519; AVX2-NEXT: shll $29, %edx
1520; AVX2-NEXT: orl %ecx, %edx
1521; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1522; AVX2-NEXT: andl $1, %ecx
1523; AVX2-NEXT: shll $30, %ecx
1524; AVX2-NEXT: orl %edx, %ecx
1525; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1526; AVX2-NEXT: shll $31, %edx
1527; AVX2-NEXT: orl %ecx, %edx
1528; AVX2-NEXT: orl %eax, %edx
1529; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1530; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1531; AVX2-NEXT: vmovd %xmm0, %ecx
1532; AVX2-NEXT: vpextrd $1, %xmm0, %eax
1533; AVX2-NEXT: addl %ecx, %eax
1534; AVX2-NEXT: vzeroupper
1535; AVX2-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001536;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +00001537; AVX512-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001538; AVX512: # %bb.0:
1539; AVX512-NEXT: vpmovb2m %zmm0, %k0
1540; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
1541; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1542; AVX512-NEXT: vmovd %xmm0, %ecx
1543; AVX512-NEXT: vpextrd $1, %xmm0, %eax
1544; AVX512-NEXT: addl %ecx, %eax
1545; AVX512-NEXT: vzeroupper
1546; AVX512-NEXT: retq
1547 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1548 %2 = bitcast <64 x i1> %1 to <2 x i32>
1549 %3 = extractelement <2 x i32> %2, i32 0
1550 %4 = extractelement <2 x i32> %2, i32 1
1551 %5 = add i32 %3, %4
1552 ret i32 %5
1553}