blob: 07ae428e24b0896597ee3ab9df4ceab9e85a97aa [file] [log] [blame]
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000015; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
16; SSE2-SSSE3-NEXT: movl %ecx, %eax
17; SSE2-SSSE3-NEXT: shrb %al
18; SSE2-SSSE3-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000019; SSE2-SSSE3-NEXT: retq
20;
21; AVX12-LABEL: bitcast_v2i64_to_v2i1:
22; AVX12: # %bb.0:
Simon Pilgrim99eefe92019-05-01 10:02:22 +000023; AVX12-NEXT: vmovmskpd %xmm0, %ecx
24; AVX12-NEXT: movl %ecx, %eax
25; AVX12-NEXT: shrb %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000026; AVX12-NEXT: addb %cl, %al
Simon Pilgrim10daecb2019-04-24 17:25:45 +000027; AVX12-NEXT: retq
28;
29; AVX512-LABEL: bitcast_v2i64_to_v2i1:
30; AVX512: # %bb.0:
31; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
32; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
33; AVX512-NEXT: kshiftrw $1, %k0, %k1
34; AVX512-NEXT: kmovd %k1, %ecx
35; AVX512-NEXT: kmovd %k0, %eax
36; AVX512-NEXT: addb %cl, %al
37; AVX512-NEXT: # kill: def $al killed $al killed $eax
38; AVX512-NEXT: retq
39 %1 = icmp slt <2 x i64> %a0, zeroinitializer
40 %2 = bitcast <2 x i1> %1 to <2 x i1>
41 %3 = extractelement <2 x i1> %2, i32 0
42 %4 = extractelement <2 x i1> %2, i32 1
43 %5 = add i1 %3, %4
44 ret i1 %5
45}
46
47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000050; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000051; SSE2-SSSE3-NEXT: movl %eax, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000052; SSE2-SSSE3-NEXT: andl $3, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +000053; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000054; SSE2-SSSE3-NEXT: shrl $2, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000055; SSE2-SSSE3-NEXT: movq %rax, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000056; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
57; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
Simon Pilgrim10daecb2019-04-24 17:25:45 +000058; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
59; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
60; SSE2-SSSE3-NEXT: retq
61;
62; AVX12-LABEL: bitcast_v4i32_to_v2i2:
63; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +000064; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000065; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000066; AVX12-NEXT: shrl $2, %ecx
67; AVX12-NEXT: vmovd %ecx, %xmm0
68; AVX12-NEXT: andl $3, %eax
69; AVX12-NEXT: vmovd %eax, %xmm1
70; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
71; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +000072; AVX12-NEXT: addb %cl, %al
73; AVX12-NEXT: # kill: def $al killed $al killed $eax
74; AVX12-NEXT: retq
75;
76; AVX512-LABEL: bitcast_v4i32_to_v2i2:
77; AVX512: # %bb.0:
78; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
79; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
80; AVX512-NEXT: kmovd %k0, %eax
81; AVX512-NEXT: movzbl %al, %ecx
82; AVX512-NEXT: shrl $2, %ecx
83; AVX512-NEXT: andl $3, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000084; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +000085; AVX512-NEXT: andl $3, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +000086; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +000087; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
88; AVX512-NEXT: vpextrb $0, %xmm0, %eax
89; AVX512-NEXT: addb %cl, %al
90; AVX512-NEXT: # kill: def $al killed $al killed $eax
91; AVX512-NEXT: retq
92 %1 = icmp slt <4 x i32> %a0, zeroinitializer
93 %2 = bitcast <4 x i1> %1 to <2 x i2>
94 %3 = extractelement <2 x i2> %2, i32 0
95 %4 = extractelement <2 x i2> %2, i32 1
96 %5 = add i2 %3, %4
97 ret i2 %5
98}
99
100define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
101; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
102; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000103; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
104; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
105; SSE2-SSSE3-NEXT: movzbl %al, %ecx
106; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000107; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
108; SSE2-SSSE3-NEXT: andl $15, %eax
109; SSE2-SSSE3-NEXT: movq %rax, %xmm1
110; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
111; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
112; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
113; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
114; SSE2-SSSE3-NEXT: retq
115;
116; AVX12-LABEL: bitcast_v8i16_to_v2i4:
117; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000118; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
119; AVX12-NEXT: vpmovmskb %xmm0, %eax
120; AVX12-NEXT: movzbl %al, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000121; AVX12-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000122; AVX12-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000123; AVX12-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000124; AVX12-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000125; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
126; AVX12-NEXT: vpextrb $0, %xmm0, %eax
127; AVX12-NEXT: addb %cl, %al
128; AVX12-NEXT: # kill: def $al killed $al killed $eax
129; AVX12-NEXT: retq
130;
131; AVX512-LABEL: bitcast_v8i16_to_v2i4:
132; AVX512: # %bb.0:
133; AVX512-NEXT: vpmovw2m %xmm0, %k0
134; AVX512-NEXT: kmovd %k0, %eax
135; AVX512-NEXT: movzbl %al, %ecx
136; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000137; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000138; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000139; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000140; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
141; AVX512-NEXT: vpextrb $0, %xmm0, %eax
142; AVX512-NEXT: addb %cl, %al
143; AVX512-NEXT: # kill: def $al killed $al killed $eax
144; AVX512-NEXT: retq
145 %1 = icmp slt <8 x i16> %a0, zeroinitializer
146 %2 = bitcast <8 x i1> %1 to <2 x i4>
147 %3 = extractelement <2 x i4> %2, i32 0
148 %4 = extractelement <2 x i4> %2, i32 1
149 %5 = add i4 %3, %4
150 ret i4 %5
151}
152
153define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
Craig Topper3de33242019-08-05 18:25:36 +0000154; SSE2-SSSE3-LABEL: bitcast_v16i8_to_v2i8:
155; SSE2-SSSE3: # %bb.0:
156; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
157; SSE2-SSSE3-NEXT: movd %eax, %xmm0
158; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
159; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
160; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
161; SSE2-SSSE3-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000162;
163; AVX12-LABEL: bitcast_v16i8_to_v2i8:
164; AVX12: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000165; AVX12-NEXT: vpmovmskb %xmm0, %eax
166; AVX12-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000167; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
168; AVX12-NEXT: vpextrb $1, %xmm0, %eax
169; AVX12-NEXT: addb %cl, %al
170; AVX12-NEXT: # kill: def $al killed $al killed $eax
171; AVX12-NEXT: retq
172;
173; AVX512-LABEL: bitcast_v16i8_to_v2i8:
174; AVX512: # %bb.0:
175; AVX512-NEXT: vpmovb2m %xmm0, %k0
176; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000177; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000178; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
179; AVX512-NEXT: vpextrb $1, %xmm0, %eax
180; AVX512-NEXT: addb %cl, %al
181; AVX512-NEXT: # kill: def $al killed $al killed $eax
182; AVX512-NEXT: retq
183 %1 = icmp slt <16 x i8> %a0, zeroinitializer
184 %2 = bitcast <16 x i1> %1 to <2 x i8>
185 %3 = extractelement <2 x i8> %2, i32 0
186 %4 = extractelement <2 x i8> %2, i32 1
187 %5 = add i8 %3, %4
188 ret i8 %5
189}
190
191;
192; 256-bit vectors
193;
194
195define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
196; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
197; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000198; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
199; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000200; SSE2-SSSE3-NEXT: movl %eax, %ecx
201; SSE2-SSSE3-NEXT: shrl $2, %ecx
202; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
203; SSE2-SSSE3-NEXT: andl $3, %eax
204; SSE2-SSSE3-NEXT: movq %rax, %xmm1
205; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
206; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
207; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
208; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
209; SSE2-SSSE3-NEXT: retq
210;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000211; AVX12-LABEL: bitcast_v4i64_to_v2i2:
212; AVX12: # %bb.0:
213; AVX12-NEXT: vmovmskpd %ymm0, %eax
214; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000215; AVX12-NEXT: shrl $2, %ecx
216; AVX12-NEXT: vmovd %ecx, %xmm0
217; AVX12-NEXT: andl $3, %eax
218; AVX12-NEXT: vmovd %eax, %xmm1
219; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
220; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000221; AVX12-NEXT: addb %cl, %al
222; AVX12-NEXT: # kill: def $al killed $al killed $eax
223; AVX12-NEXT: vzeroupper
224; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000225;
226; AVX512-LABEL: bitcast_v4i64_to_v2i2:
227; AVX512: # %bb.0:
228; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
229; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
230; AVX512-NEXT: kmovd %k0, %eax
231; AVX512-NEXT: movzbl %al, %ecx
232; AVX512-NEXT: shrl $2, %ecx
233; AVX512-NEXT: andl $3, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000234; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000235; AVX512-NEXT: andl $3, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000236; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000237; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
238; AVX512-NEXT: vpextrb $0, %xmm0, %eax
239; AVX512-NEXT: addb %cl, %al
240; AVX512-NEXT: # kill: def $al killed $al killed $eax
241; AVX512-NEXT: vzeroupper
242; AVX512-NEXT: retq
243 %1 = icmp slt <4 x i64> %a0, zeroinitializer
244 %2 = bitcast <4 x i1> %1 to <2 x i2>
245 %3 = extractelement <2 x i2> %2, i32 0
246 %4 = extractelement <2 x i2> %2, i32 1
247 %5 = add i2 %3, %4
248 ret i2 %5
249}
250
251define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
252; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
253; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000254; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
255; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
256; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
257; SSE2-SSSE3-NEXT: movzbl %al, %ecx
258; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000259; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
260; SSE2-SSSE3-NEXT: andl $15, %eax
261; SSE2-SSSE3-NEXT: movq %rax, %xmm1
262; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
263; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
264; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
265; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
266; SSE2-SSSE3-NEXT: retq
267;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000268; AVX12-LABEL: bitcast_v8i32_to_v2i4:
269; AVX12: # %bb.0:
270; AVX12-NEXT: vmovmskps %ymm0, %eax
271; AVX12-NEXT: movl %eax, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000272; AVX12-NEXT: shrl $4, %ecx
273; AVX12-NEXT: vmovd %ecx, %xmm0
274; AVX12-NEXT: andl $15, %eax
275; AVX12-NEXT: vmovd %eax, %xmm1
276; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
277; AVX12-NEXT: vpextrb $0, %xmm0, %eax
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000278; AVX12-NEXT: addb %cl, %al
279; AVX12-NEXT: # kill: def $al killed $al killed $eax
280; AVX12-NEXT: vzeroupper
281; AVX12-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000282;
283; AVX512-LABEL: bitcast_v8i32_to_v2i4:
284; AVX512: # %bb.0:
285; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
286; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
287; AVX512-NEXT: kmovd %k0, %eax
288; AVX512-NEXT: movzbl %al, %ecx
289; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000290; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000291; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000292; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000293; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
294; AVX512-NEXT: vpextrb $0, %xmm0, %eax
295; AVX512-NEXT: addb %cl, %al
296; AVX512-NEXT: # kill: def $al killed $al killed $eax
297; AVX512-NEXT: vzeroupper
298; AVX512-NEXT: retq
299 %1 = icmp slt <8 x i32> %a0, zeroinitializer
300 %2 = bitcast <8 x i1> %1 to <2 x i4>
301 %3 = extractelement <2 x i4> %2, i32 0
302 %4 = extractelement <2 x i4> %2, i32 1
303 %5 = add i4 %3, %4
304 ret i4 %5
305}
306
307define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
Craig Topper3de33242019-08-05 18:25:36 +0000308; SSE2-SSSE3-LABEL: bitcast_v16i16_to_v2i8:
309; SSE2-SSSE3: # %bb.0:
310; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
311; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
312; SSE2-SSSE3-NEXT: movd %eax, %xmm0
313; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
314; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
315; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
316; SSE2-SSSE3-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000317;
318; AVX1-LABEL: bitcast_v16i16_to_v2i8:
319; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000320; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
321; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
322; AVX1-NEXT: vpmovmskb %xmm0, %eax
323; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000324; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
325; AVX1-NEXT: vpextrb $1, %xmm0, %eax
326; AVX1-NEXT: addb %cl, %al
327; AVX1-NEXT: # kill: def $al killed $al killed $eax
328; AVX1-NEXT: vzeroupper
329; AVX1-NEXT: retq
330;
331; AVX2-LABEL: bitcast_v16i16_to_v2i8:
332; AVX2: # %bb.0:
333; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
334; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000335; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
336; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
337; AVX2-NEXT: vpmovmskb %xmm0, %eax
338; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000339; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
340; AVX2-NEXT: vpextrb $1, %xmm0, %eax
341; AVX2-NEXT: addb %cl, %al
342; AVX2-NEXT: # kill: def $al killed $al killed $eax
343; AVX2-NEXT: vzeroupper
344; AVX2-NEXT: retq
345;
346; AVX512-LABEL: bitcast_v16i16_to_v2i8:
347; AVX512: # %bb.0:
348; AVX512-NEXT: vpmovw2m %ymm0, %k0
349; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000350; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000351; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
352; AVX512-NEXT: vpextrb $1, %xmm0, %eax
353; AVX512-NEXT: addb %cl, %al
354; AVX512-NEXT: # kill: def $al killed $al killed $eax
355; AVX512-NEXT: vzeroupper
356; AVX512-NEXT: retq
357 %1 = icmp slt <16 x i16> %a0, zeroinitializer
358 %2 = bitcast <16 x i1> %1 to <2 x i8>
359 %3 = extractelement <2 x i8> %2, i32 0
360 %4 = extractelement <2 x i8> %2, i32 1
361 %5 = add i8 %3, %4
362 ret i8 %5
363}
364
365define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
366; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
367; SSE2-SSSE3: # %bb.0:
Craig Topper3de33242019-08-05 18:25:36 +0000368; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
369; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
370; SSE2-SSSE3-NEXT: shll $16, %eax
371; SSE2-SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000372; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
373; SSE2-SSSE3-NEXT: addl %ecx, %eax
374; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000375; SSE2-SSSE3-NEXT: retq
376;
377; AVX1-LABEL: bitcast_v32i8_to_v2i16:
378; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000379; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000380; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000381; AVX1-NEXT: vpmovmskb %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000382; AVX1-NEXT: shll $16, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000383; AVX1-NEXT: orl %eax, %ecx
384; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000385; AVX1-NEXT: vpextrw $1, %xmm0, %eax
386; AVX1-NEXT: addl %ecx, %eax
387; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000388; AVX1-NEXT: vzeroupper
389; AVX1-NEXT: retq
390;
391; AVX2-LABEL: bitcast_v32i8_to_v2i16:
392; AVX2: # %bb.0:
Craig Topper3de33242019-08-05 18:25:36 +0000393; AVX2-NEXT: vpmovmskb %ymm0, %ecx
394; AVX2-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000395; AVX2-NEXT: vpextrw $1, %xmm0, %eax
396; AVX2-NEXT: addl %ecx, %eax
397; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000398; AVX2-NEXT: vzeroupper
399; AVX2-NEXT: retq
400;
401; AVX512-LABEL: bitcast_v32i8_to_v2i16:
402; AVX512: # %bb.0:
403; AVX512-NEXT: pushq %rbp
404; AVX512-NEXT: movq %rsp, %rbp
405; AVX512-NEXT: andq $-32, %rsp
406; AVX512-NEXT: subq $32, %rsp
407; AVX512-NEXT: vpmovb2m %ymm0, %k0
408; AVX512-NEXT: kmovd %k0, (%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000409; AVX512-NEXT: vmovdqa (%rsp), %xmm0
410; AVX512-NEXT: vmovd %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000411; AVX512-NEXT: vpextrw $1, %xmm0, %eax
412; AVX512-NEXT: addl %ecx, %eax
413; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
414; AVX512-NEXT: movq %rbp, %rsp
415; AVX512-NEXT: popq %rbp
416; AVX512-NEXT: vzeroupper
417; AVX512-NEXT: retq
418 %1 = icmp slt <32 x i8> %a0, zeroinitializer
419 %2 = bitcast <32 x i1> %1 to <2 x i16>
420 %3 = extractelement <2 x i16> %2, i32 0
421 %4 = extractelement <2 x i16> %2, i32 1
422 %5 = add i16 %3, %4
423 ret i16 %5
424}
425
426;
427; 512-bit vectors
428;
429
430define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
431; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
432; SSE2-SSSE3: # %bb.0:
433; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000434; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000435; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000436; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000437; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000438; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
439; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
440; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000441; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000442; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
443; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
444; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
445; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
446; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
447; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
448; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
449; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
450; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
451; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
452; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000453; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000454; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
455; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
456; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000457; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
458; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
459; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000460; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
461; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
462; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
463; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
464; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
465; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
466; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000467; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000468; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
469; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
470; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
471; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
472; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
473; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
474; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
475; SSE2-SSSE3-NEXT: movzbl %al, %ecx
476; SSE2-SSSE3-NEXT: shrl $4, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000477; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
478; SSE2-SSSE3-NEXT: andl $15, %eax
479; SSE2-SSSE3-NEXT: movq %rax, %xmm1
480; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
481; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
482; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
483; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
484; SSE2-SSSE3-NEXT: retq
485;
486; AVX1-LABEL: bitcast_v8i64_to_v2i4:
487; AVX1: # %bb.0:
Craig Topper55a71b52019-05-06 19:29:24 +0000488; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Craig Topper61e556d2019-05-13 04:03:35 +0000489; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
Craig Topper55a71b52019-05-06 19:29:24 +0000490; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000491; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
492; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Craig Topper61e556d2019-05-13 04:03:35 +0000493; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
494; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000495; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
496; AVX1-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000497; AVX1-NEXT: movl %eax, %ecx
498; AVX1-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000499; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000500; AVX1-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000501; AVX1-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000502; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
503; AVX1-NEXT: vpextrb $0, %xmm0, %eax
504; AVX1-NEXT: addb %cl, %al
505; AVX1-NEXT: # kill: def $al killed $al killed $eax
506; AVX1-NEXT: vzeroupper
507; AVX1-NEXT: retq
508;
509; AVX2-LABEL: bitcast_v8i64_to_v2i4:
510; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000511; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
512; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
513; AVX2-NEXT: vmovmskps %ymm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000514; AVX2-NEXT: movl %eax, %ecx
515; AVX2-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000516; AVX2-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000517; AVX2-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000518; AVX2-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000519; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
520; AVX2-NEXT: vpextrb $0, %xmm0, %eax
521; AVX2-NEXT: addb %cl, %al
522; AVX2-NEXT: # kill: def $al killed $al killed $eax
523; AVX2-NEXT: vzeroupper
524; AVX2-NEXT: retq
525;
526; AVX512-LABEL: bitcast_v8i64_to_v2i4:
527; AVX512: # %bb.0:
528; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
529; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
530; AVX512-NEXT: kmovd %k0, %eax
531; AVX512-NEXT: movzbl %al, %ecx
532; AVX512-NEXT: shrl $4, %ecx
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000533; AVX512-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000534; AVX512-NEXT: andl $15, %eax
Simon Pilgrimfda6bff2019-05-11 21:35:50 +0000535; AVX512-NEXT: vmovd %eax, %xmm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000536; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
537; AVX512-NEXT: vpextrb $0, %xmm0, %eax
538; AVX512-NEXT: addb %cl, %al
539; AVX512-NEXT: # kill: def $al killed $al killed $eax
540; AVX512-NEXT: vzeroupper
541; AVX512-NEXT: retq
542 %1 = icmp slt <8 x i64> %a0, zeroinitializer
543 %2 = bitcast <8 x i1> %1 to <2 x i4>
544 %3 = extractelement <2 x i4> %2, i32 0
545 %4 = extractelement <2 x i4> %2, i32 1
546 %5 = add i4 %3, %4
547 ret i4 %5
548}
549
550define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
Craig Topper3de33242019-08-05 18:25:36 +0000551; SSE2-SSSE3-LABEL: bitcast_v16i32_to_v2i8:
552; SSE2-SSSE3: # %bb.0:
553; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
554; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
555; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm0
556; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
557; SSE2-SSSE3-NEXT: movd %eax, %xmm0
558; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
559; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
560; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
561; SSE2-SSSE3-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000562;
563; AVX1-LABEL: bitcast_v16i32_to_v2i8:
564; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000565; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
566; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
567; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
568; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
569; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
570; AVX1-NEXT: vpmovmskb %xmm0, %eax
571; AVX1-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000572; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
573; AVX1-NEXT: vpextrb $1, %xmm0, %eax
574; AVX1-NEXT: addb %cl, %al
575; AVX1-NEXT: # kill: def $al killed $al killed $eax
576; AVX1-NEXT: vzeroupper
577; AVX1-NEXT: retq
578;
579; AVX2-LABEL: bitcast_v16i32_to_v2i8:
580; AVX2: # %bb.0:
581; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000582; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000583; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000584; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
585; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
586; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
587; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
588; AVX2-NEXT: vpmovmskb %xmm0, %eax
589; AVX2-NEXT: vmovd %eax, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000590; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
591; AVX2-NEXT: vpextrb $1, %xmm0, %eax
592; AVX2-NEXT: addb %cl, %al
593; AVX2-NEXT: # kill: def $al killed $al killed $eax
594; AVX2-NEXT: vzeroupper
595; AVX2-NEXT: retq
596;
597; AVX512-LABEL: bitcast_v16i32_to_v2i8:
598; AVX512: # %bb.0:
599; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
600; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
601; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000602; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000603; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
604; AVX512-NEXT: vpextrb $1, %xmm0, %eax
605; AVX512-NEXT: addb %cl, %al
606; AVX512-NEXT: # kill: def $al killed $al killed $eax
607; AVX512-NEXT: vzeroupper
608; AVX512-NEXT: retq
609 %1 = icmp slt <16 x i32> %a0, zeroinitializer
610 %2 = bitcast <16 x i1> %1 to <2 x i8>
611 %3 = extractelement <2 x i8> %2, i32 0
612 %4 = extractelement <2 x i8> %2, i32 1
613 %5 = add i8 %3, %4
614 ret i8 %5
615}
616
617define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
618; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
619; SSE2-SSSE3: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000620; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
Craig Topper3de33242019-08-05 18:25:36 +0000621; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000622; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
Craig Topper3de33242019-08-05 18:25:36 +0000623; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
624; SSE2-SSSE3-NEXT: shll $16, %eax
625; SSE2-SSSE3-NEXT: movd %eax, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000626; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
627; SSE2-SSSE3-NEXT: addl %ecx, %eax
628; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000629; SSE2-SSSE3-NEXT: retq
630;
631; AVX1-LABEL: bitcast_v32i16_to_v2i16:
632; AVX1: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000633; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
634; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
635; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000636; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000637; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
638; AVX1-NEXT: vpmovmskb %xmm0, %ecx
639; AVX1-NEXT: shll $16, %ecx
640; AVX1-NEXT: orl %eax, %ecx
641; AVX1-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000642; AVX1-NEXT: vpextrw $1, %xmm0, %eax
643; AVX1-NEXT: addl %ecx, %eax
644; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000645; AVX1-NEXT: vzeroupper
646; AVX1-NEXT: retq
647;
648; AVX2-LABEL: bitcast_v32i16_to_v2i16:
649; AVX2: # %bb.0:
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000650; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
651; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Craig Topper3de33242019-08-05 18:25:36 +0000652; AVX2-NEXT: vpmovmskb %ymm0, %ecx
653; AVX2-NEXT: vmovd %ecx, %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000654; AVX2-NEXT: vpextrw $1, %xmm0, %eax
655; AVX2-NEXT: addl %ecx, %eax
656; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000657; AVX2-NEXT: vzeroupper
658; AVX2-NEXT: retq
659;
660; AVX512-LABEL: bitcast_v32i16_to_v2i16:
661; AVX512: # %bb.0:
662; AVX512-NEXT: pushq %rbp
663; AVX512-NEXT: movq %rsp, %rbp
664; AVX512-NEXT: andq $-32, %rsp
665; AVX512-NEXT: subq $32, %rsp
666; AVX512-NEXT: vpmovw2m %zmm0, %k0
667; AVX512-NEXT: kmovd %k0, (%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000668; AVX512-NEXT: vmovdqa (%rsp), %xmm0
669; AVX512-NEXT: vmovd %xmm0, %ecx
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000670; AVX512-NEXT: vpextrw $1, %xmm0, %eax
671; AVX512-NEXT: addl %ecx, %eax
672; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
673; AVX512-NEXT: movq %rbp, %rsp
674; AVX512-NEXT: popq %rbp
675; AVX512-NEXT: vzeroupper
676; AVX512-NEXT: retq
677 %1 = icmp slt <32 x i16> %a0, zeroinitializer
678 %2 = bitcast <32 x i1> %1 to <2 x i16>
679 %3 = extractelement <2 x i16> %2, i32 0
680 %4 = extractelement <2 x i16> %2, i32 1
681 %5 = add i16 %3, %4
682 ret i16 %5
683}
684
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +0000685define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
686; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000687; SSE2-SSSE3: # %bb.0:
Craig Topperf9c30ed2019-04-25 18:19:59 +0000688; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4
689; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
690; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm5
691; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
692; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
693; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm3
694; SSE2-SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
695; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
696; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm2
697; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
698; SSE2-SSSE3-NEXT: pcmpgtb %xmm0, %xmm4
699; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
700; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
701; SSE2-SSSE3-NEXT: andl $1, %eax
702; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
703; SSE2-SSSE3-NEXT: andl $1, %ecx
704; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
705; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
706; SSE2-SSSE3-NEXT: andl $1, %ecx
707; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
708; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
709; SSE2-SSSE3-NEXT: andl $1, %ecx
710; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
711; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
712; SSE2-SSSE3-NEXT: andl $1, %ecx
713; SSE2-SSSE3-NEXT: shll $4, %ecx
714; SSE2-SSSE3-NEXT: orl %eax, %ecx
715; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
716; SSE2-SSSE3-NEXT: andl $1, %eax
717; SSE2-SSSE3-NEXT: shll $5, %eax
718; SSE2-SSSE3-NEXT: orl %ecx, %eax
719; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
720; SSE2-SSSE3-NEXT: andl $1, %ecx
721; SSE2-SSSE3-NEXT: shll $6, %ecx
722; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
723; SSE2-SSSE3-NEXT: andl $1, %edx
724; SSE2-SSSE3-NEXT: shll $7, %edx
725; SSE2-SSSE3-NEXT: orl %ecx, %edx
726; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
727; SSE2-SSSE3-NEXT: andl $1, %ecx
728; SSE2-SSSE3-NEXT: shll $8, %ecx
729; SSE2-SSSE3-NEXT: orl %edx, %ecx
730; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
731; SSE2-SSSE3-NEXT: andl $1, %edx
732; SSE2-SSSE3-NEXT: shll $9, %edx
733; SSE2-SSSE3-NEXT: orl %ecx, %edx
734; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
735; SSE2-SSSE3-NEXT: andl $1, %ecx
736; SSE2-SSSE3-NEXT: shll $10, %ecx
737; SSE2-SSSE3-NEXT: orl %edx, %ecx
738; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
739; SSE2-SSSE3-NEXT: andl $1, %edx
740; SSE2-SSSE3-NEXT: shll $11, %edx
741; SSE2-SSSE3-NEXT: orl %ecx, %edx
742; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
743; SSE2-SSSE3-NEXT: andl $1, %ecx
744; SSE2-SSSE3-NEXT: shll $12, %ecx
745; SSE2-SSSE3-NEXT: orl %edx, %ecx
746; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
747; SSE2-SSSE3-NEXT: andl $1, %edx
748; SSE2-SSSE3-NEXT: shll $13, %edx
749; SSE2-SSSE3-NEXT: orl %ecx, %edx
750; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
751; SSE2-SSSE3-NEXT: andl $1, %ecx
752; SSE2-SSSE3-NEXT: shll $14, %ecx
753; SSE2-SSSE3-NEXT: orl %edx, %ecx
754; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
755; SSE2-SSSE3-NEXT: shll $15, %edx
756; SSE2-SSSE3-NEXT: orl %ecx, %edx
757; SSE2-SSSE3-NEXT: orl %eax, %edx
758; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
759; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
760; SSE2-SSSE3-NEXT: andl $1, %eax
761; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
762; SSE2-SSSE3-NEXT: andl $1, %ecx
763; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
764; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
765; SSE2-SSSE3-NEXT: andl $1, %ecx
766; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
767; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
768; SSE2-SSSE3-NEXT: andl $1, %ecx
769; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
770; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
771; SSE2-SSSE3-NEXT: andl $1, %ecx
772; SSE2-SSSE3-NEXT: shll $4, %ecx
773; SSE2-SSSE3-NEXT: orl %eax, %ecx
774; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
775; SSE2-SSSE3-NEXT: andl $1, %eax
776; SSE2-SSSE3-NEXT: shll $5, %eax
777; SSE2-SSSE3-NEXT: orl %ecx, %eax
778; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
779; SSE2-SSSE3-NEXT: andl $1, %ecx
780; SSE2-SSSE3-NEXT: shll $6, %ecx
781; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
782; SSE2-SSSE3-NEXT: andl $1, %edx
783; SSE2-SSSE3-NEXT: shll $7, %edx
784; SSE2-SSSE3-NEXT: orl %ecx, %edx
785; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
786; SSE2-SSSE3-NEXT: andl $1, %ecx
787; SSE2-SSSE3-NEXT: shll $8, %ecx
788; SSE2-SSSE3-NEXT: orl %edx, %ecx
789; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
790; SSE2-SSSE3-NEXT: andl $1, %edx
791; SSE2-SSSE3-NEXT: shll $9, %edx
792; SSE2-SSSE3-NEXT: orl %ecx, %edx
793; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
794; SSE2-SSSE3-NEXT: andl $1, %ecx
795; SSE2-SSSE3-NEXT: shll $10, %ecx
796; SSE2-SSSE3-NEXT: orl %edx, %ecx
797; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
798; SSE2-SSSE3-NEXT: andl $1, %edx
799; SSE2-SSSE3-NEXT: shll $11, %edx
800; SSE2-SSSE3-NEXT: orl %ecx, %edx
801; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
802; SSE2-SSSE3-NEXT: andl $1, %ecx
803; SSE2-SSSE3-NEXT: shll $12, %ecx
804; SSE2-SSSE3-NEXT: orl %edx, %ecx
805; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
806; SSE2-SSSE3-NEXT: andl $1, %edx
807; SSE2-SSSE3-NEXT: shll $13, %edx
808; SSE2-SSSE3-NEXT: orl %ecx, %edx
809; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
810; SSE2-SSSE3-NEXT: andl $1, %ecx
811; SSE2-SSSE3-NEXT: shll $14, %ecx
812; SSE2-SSSE3-NEXT: orl %edx, %ecx
813; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
814; SSE2-SSSE3-NEXT: shll $15, %edx
815; SSE2-SSSE3-NEXT: orl %ecx, %edx
816; SSE2-SSSE3-NEXT: orl %eax, %edx
817; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
818; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
819; SSE2-SSSE3-NEXT: andl $1, %eax
820; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
821; SSE2-SSSE3-NEXT: andl $1, %ecx
822; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
823; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
824; SSE2-SSSE3-NEXT: andl $1, %ecx
825; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
826; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
827; SSE2-SSSE3-NEXT: andl $1, %ecx
828; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
829; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
830; SSE2-SSSE3-NEXT: andl $1, %ecx
831; SSE2-SSSE3-NEXT: shll $4, %ecx
832; SSE2-SSSE3-NEXT: orl %eax, %ecx
833; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
834; SSE2-SSSE3-NEXT: andl $1, %eax
835; SSE2-SSSE3-NEXT: shll $5, %eax
836; SSE2-SSSE3-NEXT: orl %ecx, %eax
837; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
838; SSE2-SSSE3-NEXT: andl $1, %ecx
839; SSE2-SSSE3-NEXT: shll $6, %ecx
840; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
841; SSE2-SSSE3-NEXT: andl $1, %edx
842; SSE2-SSSE3-NEXT: shll $7, %edx
843; SSE2-SSSE3-NEXT: orl %ecx, %edx
844; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
845; SSE2-SSSE3-NEXT: andl $1, %ecx
846; SSE2-SSSE3-NEXT: shll $8, %ecx
847; SSE2-SSSE3-NEXT: orl %edx, %ecx
848; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
849; SSE2-SSSE3-NEXT: andl $1, %edx
850; SSE2-SSSE3-NEXT: shll $9, %edx
851; SSE2-SSSE3-NEXT: orl %ecx, %edx
852; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
853; SSE2-SSSE3-NEXT: andl $1, %ecx
854; SSE2-SSSE3-NEXT: shll $10, %ecx
855; SSE2-SSSE3-NEXT: orl %edx, %ecx
856; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
857; SSE2-SSSE3-NEXT: andl $1, %edx
858; SSE2-SSSE3-NEXT: shll $11, %edx
859; SSE2-SSSE3-NEXT: orl %ecx, %edx
860; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
861; SSE2-SSSE3-NEXT: andl $1, %ecx
862; SSE2-SSSE3-NEXT: shll $12, %ecx
863; SSE2-SSSE3-NEXT: orl %edx, %ecx
864; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
865; SSE2-SSSE3-NEXT: andl $1, %edx
866; SSE2-SSSE3-NEXT: shll $13, %edx
867; SSE2-SSSE3-NEXT: orl %ecx, %edx
868; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
869; SSE2-SSSE3-NEXT: andl $1, %ecx
870; SSE2-SSSE3-NEXT: shll $14, %ecx
871; SSE2-SSSE3-NEXT: orl %edx, %ecx
872; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
873; SSE2-SSSE3-NEXT: shll $15, %edx
874; SSE2-SSSE3-NEXT: orl %ecx, %edx
875; SSE2-SSSE3-NEXT: orl %eax, %edx
876; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
877; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
878; SSE2-SSSE3-NEXT: andl $1, %eax
879; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
880; SSE2-SSSE3-NEXT: andl $1, %ecx
881; SSE2-SSSE3-NEXT: leal (%rcx,%rax,2), %eax
882; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
883; SSE2-SSSE3-NEXT: andl $1, %ecx
884; SSE2-SSSE3-NEXT: leal (%rax,%rcx,4), %eax
885; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
886; SSE2-SSSE3-NEXT: andl $1, %ecx
887; SSE2-SSSE3-NEXT: leal (%rax,%rcx,8), %eax
888; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
889; SSE2-SSSE3-NEXT: andl $1, %ecx
890; SSE2-SSSE3-NEXT: shll $4, %ecx
891; SSE2-SSSE3-NEXT: orl %eax, %ecx
892; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
893; SSE2-SSSE3-NEXT: andl $1, %eax
894; SSE2-SSSE3-NEXT: shll $5, %eax
895; SSE2-SSSE3-NEXT: orl %ecx, %eax
896; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
897; SSE2-SSSE3-NEXT: andl $1, %ecx
898; SSE2-SSSE3-NEXT: shll $6, %ecx
899; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
900; SSE2-SSSE3-NEXT: andl $1, %edx
901; SSE2-SSSE3-NEXT: shll $7, %edx
902; SSE2-SSSE3-NEXT: orl %ecx, %edx
903; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
904; SSE2-SSSE3-NEXT: andl $1, %ecx
905; SSE2-SSSE3-NEXT: shll $8, %ecx
906; SSE2-SSSE3-NEXT: orl %edx, %ecx
907; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
908; SSE2-SSSE3-NEXT: andl $1, %edx
909; SSE2-SSSE3-NEXT: shll $9, %edx
910; SSE2-SSSE3-NEXT: orl %ecx, %edx
911; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
912; SSE2-SSSE3-NEXT: andl $1, %ecx
913; SSE2-SSSE3-NEXT: shll $10, %ecx
914; SSE2-SSSE3-NEXT: orl %edx, %ecx
915; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
916; SSE2-SSSE3-NEXT: andl $1, %edx
917; SSE2-SSSE3-NEXT: shll $11, %edx
918; SSE2-SSSE3-NEXT: orl %ecx, %edx
919; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
920; SSE2-SSSE3-NEXT: andl $1, %ecx
921; SSE2-SSSE3-NEXT: shll $12, %ecx
922; SSE2-SSSE3-NEXT: orl %edx, %ecx
923; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
924; SSE2-SSSE3-NEXT: andl $1, %edx
925; SSE2-SSSE3-NEXT: shll $13, %edx
926; SSE2-SSSE3-NEXT: orl %ecx, %edx
927; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
928; SSE2-SSSE3-NEXT: andl $1, %ecx
929; SSE2-SSSE3-NEXT: shll $14, %ecx
930; SSE2-SSSE3-NEXT: orl %edx, %ecx
931; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
932; SSE2-SSSE3-NEXT: shll $15, %edx
933; SSE2-SSSE3-NEXT: orl %ecx, %edx
934; SSE2-SSSE3-NEXT: orl %eax, %edx
935; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +0000936; SSE2-SSSE3-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
Craig Topperf9c30ed2019-04-25 18:19:59 +0000937; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
Craig Topper3de33242019-08-05 18:25:36 +0000938; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
Craig Topperf9c30ed2019-04-25 18:19:59 +0000939; SSE2-SSSE3-NEXT: movd %xmm0, %eax
940; SSE2-SSSE3-NEXT: addl %ecx, %eax
Simon Pilgrim10daecb2019-04-24 17:25:45 +0000941; SSE2-SSSE3-NEXT: retq
942;
Craig Topperf9c30ed2019-04-25 18:19:59 +0000943; AVX1-LABEL: bitcast_v64i8_to_v2i32:
944; AVX1: # %bb.0:
945; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
946; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
947; AVX1-NEXT: vpextrb $1, %xmm3, %eax
948; AVX1-NEXT: andl $1, %eax
949; AVX1-NEXT: vpextrb $0, %xmm3, %ecx
950; AVX1-NEXT: andl $1, %ecx
951; AVX1-NEXT: leal (%rcx,%rax,2), %eax
952; AVX1-NEXT: vpextrb $2, %xmm3, %ecx
953; AVX1-NEXT: andl $1, %ecx
954; AVX1-NEXT: leal (%rax,%rcx,4), %eax
955; AVX1-NEXT: vpextrb $3, %xmm3, %ecx
956; AVX1-NEXT: andl $1, %ecx
957; AVX1-NEXT: leal (%rax,%rcx,8), %eax
958; AVX1-NEXT: vpextrb $4, %xmm3, %ecx
959; AVX1-NEXT: andl $1, %ecx
960; AVX1-NEXT: shll $4, %ecx
961; AVX1-NEXT: orl %eax, %ecx
962; AVX1-NEXT: vpextrb $5, %xmm3, %eax
963; AVX1-NEXT: andl $1, %eax
964; AVX1-NEXT: shll $5, %eax
965; AVX1-NEXT: orl %ecx, %eax
966; AVX1-NEXT: vpextrb $6, %xmm3, %ecx
967; AVX1-NEXT: andl $1, %ecx
968; AVX1-NEXT: shll $6, %ecx
969; AVX1-NEXT: vpextrb $7, %xmm3, %edx
970; AVX1-NEXT: andl $1, %edx
971; AVX1-NEXT: shll $7, %edx
972; AVX1-NEXT: orl %ecx, %edx
973; AVX1-NEXT: vpextrb $8, %xmm3, %ecx
974; AVX1-NEXT: andl $1, %ecx
975; AVX1-NEXT: shll $8, %ecx
976; AVX1-NEXT: orl %edx, %ecx
977; AVX1-NEXT: vpextrb $9, %xmm3, %edx
978; AVX1-NEXT: andl $1, %edx
979; AVX1-NEXT: shll $9, %edx
980; AVX1-NEXT: orl %ecx, %edx
981; AVX1-NEXT: vpextrb $10, %xmm3, %ecx
982; AVX1-NEXT: andl $1, %ecx
983; AVX1-NEXT: shll $10, %ecx
984; AVX1-NEXT: orl %edx, %ecx
985; AVX1-NEXT: vpextrb $11, %xmm3, %edx
986; AVX1-NEXT: andl $1, %edx
987; AVX1-NEXT: shll $11, %edx
988; AVX1-NEXT: orl %ecx, %edx
989; AVX1-NEXT: vpextrb $12, %xmm3, %ecx
990; AVX1-NEXT: andl $1, %ecx
991; AVX1-NEXT: shll $12, %ecx
992; AVX1-NEXT: orl %edx, %ecx
993; AVX1-NEXT: vpextrb $13, %xmm3, %edx
994; AVX1-NEXT: andl $1, %edx
995; AVX1-NEXT: shll $13, %edx
996; AVX1-NEXT: orl %ecx, %edx
997; AVX1-NEXT: vpextrb $14, %xmm3, %ecx
998; AVX1-NEXT: andl $1, %ecx
999; AVX1-NEXT: shll $14, %ecx
1000; AVX1-NEXT: orl %edx, %ecx
1001; AVX1-NEXT: vpextrb $15, %xmm3, %edx
1002; AVX1-NEXT: andl $1, %edx
1003; AVX1-NEXT: shll $15, %edx
1004; AVX1-NEXT: orl %ecx, %edx
1005; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1006; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1007; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1008; AVX1-NEXT: andl $1, %ecx
1009; AVX1-NEXT: shll $16, %ecx
1010; AVX1-NEXT: orl %edx, %ecx
1011; AVX1-NEXT: vpextrb $1, %xmm1, %edx
1012; AVX1-NEXT: andl $1, %edx
1013; AVX1-NEXT: shll $17, %edx
1014; AVX1-NEXT: orl %ecx, %edx
1015; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1016; AVX1-NEXT: andl $1, %ecx
1017; AVX1-NEXT: shll $18, %ecx
1018; AVX1-NEXT: orl %edx, %ecx
1019; AVX1-NEXT: vpextrb $3, %xmm1, %edx
1020; AVX1-NEXT: andl $1, %edx
1021; AVX1-NEXT: shll $19, %edx
1022; AVX1-NEXT: orl %ecx, %edx
1023; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1024; AVX1-NEXT: andl $1, %ecx
1025; AVX1-NEXT: shll $20, %ecx
1026; AVX1-NEXT: orl %edx, %ecx
1027; AVX1-NEXT: vpextrb $5, %xmm1, %edx
1028; AVX1-NEXT: andl $1, %edx
1029; AVX1-NEXT: shll $21, %edx
1030; AVX1-NEXT: orl %ecx, %edx
1031; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1032; AVX1-NEXT: andl $1, %ecx
1033; AVX1-NEXT: shll $22, %ecx
1034; AVX1-NEXT: orl %edx, %ecx
1035; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1036; AVX1-NEXT: andl $1, %edx
1037; AVX1-NEXT: shll $23, %edx
1038; AVX1-NEXT: orl %ecx, %edx
1039; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1040; AVX1-NEXT: andl $1, %ecx
1041; AVX1-NEXT: shll $24, %ecx
1042; AVX1-NEXT: orl %edx, %ecx
1043; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1044; AVX1-NEXT: andl $1, %edx
1045; AVX1-NEXT: shll $25, %edx
1046; AVX1-NEXT: orl %ecx, %edx
1047; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1048; AVX1-NEXT: andl $1, %ecx
1049; AVX1-NEXT: shll $26, %ecx
1050; AVX1-NEXT: orl %edx, %ecx
1051; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1052; AVX1-NEXT: andl $1, %edx
1053; AVX1-NEXT: shll $27, %edx
1054; AVX1-NEXT: orl %ecx, %edx
1055; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1056; AVX1-NEXT: andl $1, %ecx
1057; AVX1-NEXT: shll $28, %ecx
1058; AVX1-NEXT: orl %edx, %ecx
1059; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1060; AVX1-NEXT: andl $1, %edx
1061; AVX1-NEXT: shll $29, %edx
1062; AVX1-NEXT: orl %ecx, %edx
1063; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1064; AVX1-NEXT: andl $1, %ecx
1065; AVX1-NEXT: shll $30, %ecx
1066; AVX1-NEXT: orl %edx, %ecx
1067; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1068; AVX1-NEXT: shll $31, %edx
1069; AVX1-NEXT: orl %ecx, %edx
1070; AVX1-NEXT: orl %eax, %edx
1071; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1072; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm1
1073; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1074; AVX1-NEXT: andl $1, %eax
1075; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
1076; AVX1-NEXT: andl $1, %ecx
1077; AVX1-NEXT: leal (%rcx,%rax,2), %eax
1078; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
1079; AVX1-NEXT: andl $1, %ecx
1080; AVX1-NEXT: leal (%rax,%rcx,4), %eax
1081; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
1082; AVX1-NEXT: andl $1, %ecx
1083; AVX1-NEXT: leal (%rax,%rcx,8), %eax
1084; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
1085; AVX1-NEXT: andl $1, %ecx
1086; AVX1-NEXT: shll $4, %ecx
1087; AVX1-NEXT: orl %eax, %ecx
1088; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1089; AVX1-NEXT: andl $1, %eax
1090; AVX1-NEXT: shll $5, %eax
1091; AVX1-NEXT: orl %ecx, %eax
1092; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
1093; AVX1-NEXT: andl $1, %ecx
1094; AVX1-NEXT: shll $6, %ecx
1095; AVX1-NEXT: vpextrb $7, %xmm1, %edx
1096; AVX1-NEXT: andl $1, %edx
1097; AVX1-NEXT: shll $7, %edx
1098; AVX1-NEXT: orl %ecx, %edx
1099; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
1100; AVX1-NEXT: andl $1, %ecx
1101; AVX1-NEXT: shll $8, %ecx
1102; AVX1-NEXT: orl %edx, %ecx
1103; AVX1-NEXT: vpextrb $9, %xmm1, %edx
1104; AVX1-NEXT: andl $1, %edx
1105; AVX1-NEXT: shll $9, %edx
1106; AVX1-NEXT: orl %ecx, %edx
1107; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
1108; AVX1-NEXT: andl $1, %ecx
1109; AVX1-NEXT: shll $10, %ecx
1110; AVX1-NEXT: orl %edx, %ecx
1111; AVX1-NEXT: vpextrb $11, %xmm1, %edx
1112; AVX1-NEXT: andl $1, %edx
1113; AVX1-NEXT: shll $11, %edx
1114; AVX1-NEXT: orl %ecx, %edx
1115; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
1116; AVX1-NEXT: andl $1, %ecx
1117; AVX1-NEXT: shll $12, %ecx
1118; AVX1-NEXT: orl %edx, %ecx
1119; AVX1-NEXT: vpextrb $13, %xmm1, %edx
1120; AVX1-NEXT: andl $1, %edx
1121; AVX1-NEXT: shll $13, %edx
1122; AVX1-NEXT: orl %ecx, %edx
1123; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
1124; AVX1-NEXT: andl $1, %ecx
1125; AVX1-NEXT: shll $14, %ecx
1126; AVX1-NEXT: orl %edx, %ecx
1127; AVX1-NEXT: vpextrb $15, %xmm1, %edx
1128; AVX1-NEXT: andl $1, %edx
1129; AVX1-NEXT: shll $15, %edx
1130; AVX1-NEXT: orl %ecx, %edx
1131; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1132; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1133; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
1134; AVX1-NEXT: andl $1, %ecx
1135; AVX1-NEXT: shll $16, %ecx
1136; AVX1-NEXT: orl %edx, %ecx
1137; AVX1-NEXT: vpextrb $1, %xmm0, %edx
1138; AVX1-NEXT: andl $1, %edx
1139; AVX1-NEXT: shll $17, %edx
1140; AVX1-NEXT: orl %ecx, %edx
1141; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
1142; AVX1-NEXT: andl $1, %ecx
1143; AVX1-NEXT: shll $18, %ecx
1144; AVX1-NEXT: orl %edx, %ecx
1145; AVX1-NEXT: vpextrb $3, %xmm0, %edx
1146; AVX1-NEXT: andl $1, %edx
1147; AVX1-NEXT: shll $19, %edx
1148; AVX1-NEXT: orl %ecx, %edx
1149; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
1150; AVX1-NEXT: andl $1, %ecx
1151; AVX1-NEXT: shll $20, %ecx
1152; AVX1-NEXT: orl %edx, %ecx
1153; AVX1-NEXT: vpextrb $5, %xmm0, %edx
1154; AVX1-NEXT: andl $1, %edx
1155; AVX1-NEXT: shll $21, %edx
1156; AVX1-NEXT: orl %ecx, %edx
1157; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
1158; AVX1-NEXT: andl $1, %ecx
1159; AVX1-NEXT: shll $22, %ecx
1160; AVX1-NEXT: orl %edx, %ecx
1161; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1162; AVX1-NEXT: andl $1, %edx
1163; AVX1-NEXT: shll $23, %edx
1164; AVX1-NEXT: orl %ecx, %edx
1165; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1166; AVX1-NEXT: andl $1, %ecx
1167; AVX1-NEXT: shll $24, %ecx
1168; AVX1-NEXT: orl %edx, %ecx
1169; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1170; AVX1-NEXT: andl $1, %edx
1171; AVX1-NEXT: shll $25, %edx
1172; AVX1-NEXT: orl %ecx, %edx
1173; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1174; AVX1-NEXT: andl $1, %ecx
1175; AVX1-NEXT: shll $26, %ecx
1176; AVX1-NEXT: orl %edx, %ecx
1177; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1178; AVX1-NEXT: andl $1, %edx
1179; AVX1-NEXT: shll $27, %edx
1180; AVX1-NEXT: orl %ecx, %edx
1181; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1182; AVX1-NEXT: andl $1, %ecx
1183; AVX1-NEXT: shll $28, %ecx
1184; AVX1-NEXT: orl %edx, %ecx
1185; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1186; AVX1-NEXT: andl $1, %edx
1187; AVX1-NEXT: shll $29, %edx
1188; AVX1-NEXT: orl %ecx, %edx
1189; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1190; AVX1-NEXT: andl $1, %ecx
1191; AVX1-NEXT: shll $30, %ecx
1192; AVX1-NEXT: orl %edx, %ecx
1193; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1194; AVX1-NEXT: shll $31, %edx
1195; AVX1-NEXT: orl %ecx, %edx
1196; AVX1-NEXT: orl %eax, %edx
1197; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +00001198; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Craig Topperf9c30ed2019-04-25 18:19:59 +00001199; AVX1-NEXT: vmovd %xmm0, %ecx
1200; AVX1-NEXT: vpextrd $1, %xmm0, %eax
1201; AVX1-NEXT: addl %ecx, %eax
1202; AVX1-NEXT: vzeroupper
1203; AVX1-NEXT: retq
1204;
1205; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1206; AVX2: # %bb.0:
1207; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1208; AVX2-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
1209; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1210; AVX2-NEXT: andl $1, %eax
1211; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1212; AVX2-NEXT: andl $1, %ecx
1213; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1214; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1215; AVX2-NEXT: andl $1, %ecx
1216; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1217; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1218; AVX2-NEXT: andl $1, %ecx
1219; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1220; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1221; AVX2-NEXT: andl $1, %ecx
1222; AVX2-NEXT: shll $4, %ecx
1223; AVX2-NEXT: orl %eax, %ecx
1224; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1225; AVX2-NEXT: andl $1, %eax
1226; AVX2-NEXT: shll $5, %eax
1227; AVX2-NEXT: orl %ecx, %eax
1228; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1229; AVX2-NEXT: andl $1, %ecx
1230; AVX2-NEXT: shll $6, %ecx
1231; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1232; AVX2-NEXT: andl $1, %edx
1233; AVX2-NEXT: shll $7, %edx
1234; AVX2-NEXT: orl %ecx, %edx
1235; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1236; AVX2-NEXT: andl $1, %ecx
1237; AVX2-NEXT: shll $8, %ecx
1238; AVX2-NEXT: orl %edx, %ecx
1239; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1240; AVX2-NEXT: andl $1, %edx
1241; AVX2-NEXT: shll $9, %edx
1242; AVX2-NEXT: orl %ecx, %edx
1243; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1244; AVX2-NEXT: andl $1, %ecx
1245; AVX2-NEXT: shll $10, %ecx
1246; AVX2-NEXT: orl %edx, %ecx
1247; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1248; AVX2-NEXT: andl $1, %edx
1249; AVX2-NEXT: shll $11, %edx
1250; AVX2-NEXT: orl %ecx, %edx
1251; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1252; AVX2-NEXT: andl $1, %ecx
1253; AVX2-NEXT: shll $12, %ecx
1254; AVX2-NEXT: orl %edx, %ecx
1255; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1256; AVX2-NEXT: andl $1, %edx
1257; AVX2-NEXT: shll $13, %edx
1258; AVX2-NEXT: orl %ecx, %edx
1259; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1260; AVX2-NEXT: andl $1, %ecx
1261; AVX2-NEXT: shll $14, %ecx
1262; AVX2-NEXT: orl %edx, %ecx
1263; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1264; AVX2-NEXT: andl $1, %edx
1265; AVX2-NEXT: shll $15, %edx
1266; AVX2-NEXT: orl %ecx, %edx
1267; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1268; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1269; AVX2-NEXT: andl $1, %ecx
1270; AVX2-NEXT: shll $16, %ecx
1271; AVX2-NEXT: orl %edx, %ecx
1272; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1273; AVX2-NEXT: andl $1, %edx
1274; AVX2-NEXT: shll $17, %edx
1275; AVX2-NEXT: orl %ecx, %edx
1276; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1277; AVX2-NEXT: andl $1, %ecx
1278; AVX2-NEXT: shll $18, %ecx
1279; AVX2-NEXT: orl %edx, %ecx
1280; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1281; AVX2-NEXT: andl $1, %edx
1282; AVX2-NEXT: shll $19, %edx
1283; AVX2-NEXT: orl %ecx, %edx
1284; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1285; AVX2-NEXT: andl $1, %ecx
1286; AVX2-NEXT: shll $20, %ecx
1287; AVX2-NEXT: orl %edx, %ecx
1288; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1289; AVX2-NEXT: andl $1, %edx
1290; AVX2-NEXT: shll $21, %edx
1291; AVX2-NEXT: orl %ecx, %edx
1292; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1293; AVX2-NEXT: andl $1, %ecx
1294; AVX2-NEXT: shll $22, %ecx
1295; AVX2-NEXT: orl %edx, %ecx
1296; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1297; AVX2-NEXT: andl $1, %edx
1298; AVX2-NEXT: shll $23, %edx
1299; AVX2-NEXT: orl %ecx, %edx
1300; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1301; AVX2-NEXT: andl $1, %ecx
1302; AVX2-NEXT: shll $24, %ecx
1303; AVX2-NEXT: orl %edx, %ecx
1304; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1305; AVX2-NEXT: andl $1, %edx
1306; AVX2-NEXT: shll $25, %edx
1307; AVX2-NEXT: orl %ecx, %edx
1308; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1309; AVX2-NEXT: andl $1, %ecx
1310; AVX2-NEXT: shll $26, %ecx
1311; AVX2-NEXT: orl %edx, %ecx
1312; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1313; AVX2-NEXT: andl $1, %edx
1314; AVX2-NEXT: shll $27, %edx
1315; AVX2-NEXT: orl %ecx, %edx
1316; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1317; AVX2-NEXT: andl $1, %ecx
1318; AVX2-NEXT: shll $28, %ecx
1319; AVX2-NEXT: orl %edx, %ecx
1320; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1321; AVX2-NEXT: andl $1, %edx
1322; AVX2-NEXT: shll $29, %edx
1323; AVX2-NEXT: orl %ecx, %edx
1324; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1325; AVX2-NEXT: andl $1, %ecx
1326; AVX2-NEXT: shll $30, %ecx
1327; AVX2-NEXT: orl %edx, %ecx
1328; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1329; AVX2-NEXT: shll $31, %edx
1330; AVX2-NEXT: orl %ecx, %edx
1331; AVX2-NEXT: orl %eax, %edx
1332; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1333; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1334; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1335; AVX2-NEXT: andl $1, %eax
1336; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1337; AVX2-NEXT: andl $1, %ecx
1338; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1339; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1340; AVX2-NEXT: andl $1, %ecx
1341; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1342; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1343; AVX2-NEXT: andl $1, %ecx
1344; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1345; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1346; AVX2-NEXT: andl $1, %ecx
1347; AVX2-NEXT: shll $4, %ecx
1348; AVX2-NEXT: orl %eax, %ecx
1349; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1350; AVX2-NEXT: andl $1, %eax
1351; AVX2-NEXT: shll $5, %eax
1352; AVX2-NEXT: orl %ecx, %eax
1353; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1354; AVX2-NEXT: andl $1, %ecx
1355; AVX2-NEXT: shll $6, %ecx
1356; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1357; AVX2-NEXT: andl $1, %edx
1358; AVX2-NEXT: shll $7, %edx
1359; AVX2-NEXT: orl %ecx, %edx
1360; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1361; AVX2-NEXT: andl $1, %ecx
1362; AVX2-NEXT: shll $8, %ecx
1363; AVX2-NEXT: orl %edx, %ecx
1364; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1365; AVX2-NEXT: andl $1, %edx
1366; AVX2-NEXT: shll $9, %edx
1367; AVX2-NEXT: orl %ecx, %edx
1368; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1369; AVX2-NEXT: andl $1, %ecx
1370; AVX2-NEXT: shll $10, %ecx
1371; AVX2-NEXT: orl %edx, %ecx
1372; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1373; AVX2-NEXT: andl $1, %edx
1374; AVX2-NEXT: shll $11, %edx
1375; AVX2-NEXT: orl %ecx, %edx
1376; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1377; AVX2-NEXT: andl $1, %ecx
1378; AVX2-NEXT: shll $12, %ecx
1379; AVX2-NEXT: orl %edx, %ecx
1380; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1381; AVX2-NEXT: andl $1, %edx
1382; AVX2-NEXT: shll $13, %edx
1383; AVX2-NEXT: orl %ecx, %edx
1384; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1385; AVX2-NEXT: andl $1, %ecx
1386; AVX2-NEXT: shll $14, %ecx
1387; AVX2-NEXT: orl %edx, %ecx
1388; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1389; AVX2-NEXT: andl $1, %edx
1390; AVX2-NEXT: shll $15, %edx
1391; AVX2-NEXT: orl %ecx, %edx
1392; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1393; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1394; AVX2-NEXT: andl $1, %ecx
1395; AVX2-NEXT: shll $16, %ecx
1396; AVX2-NEXT: orl %edx, %ecx
1397; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1398; AVX2-NEXT: andl $1, %edx
1399; AVX2-NEXT: shll $17, %edx
1400; AVX2-NEXT: orl %ecx, %edx
1401; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1402; AVX2-NEXT: andl $1, %ecx
1403; AVX2-NEXT: shll $18, %ecx
1404; AVX2-NEXT: orl %edx, %ecx
1405; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1406; AVX2-NEXT: andl $1, %edx
1407; AVX2-NEXT: shll $19, %edx
1408; AVX2-NEXT: orl %ecx, %edx
1409; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1410; AVX2-NEXT: andl $1, %ecx
1411; AVX2-NEXT: shll $20, %ecx
1412; AVX2-NEXT: orl %edx, %ecx
1413; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1414; AVX2-NEXT: andl $1, %edx
1415; AVX2-NEXT: shll $21, %edx
1416; AVX2-NEXT: orl %ecx, %edx
1417; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1418; AVX2-NEXT: andl $1, %ecx
1419; AVX2-NEXT: shll $22, %ecx
1420; AVX2-NEXT: orl %edx, %ecx
1421; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1422; AVX2-NEXT: andl $1, %edx
1423; AVX2-NEXT: shll $23, %edx
1424; AVX2-NEXT: orl %ecx, %edx
1425; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1426; AVX2-NEXT: andl $1, %ecx
1427; AVX2-NEXT: shll $24, %ecx
1428; AVX2-NEXT: orl %edx, %ecx
1429; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1430; AVX2-NEXT: andl $1, %edx
1431; AVX2-NEXT: shll $25, %edx
1432; AVX2-NEXT: orl %ecx, %edx
1433; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1434; AVX2-NEXT: andl $1, %ecx
1435; AVX2-NEXT: shll $26, %ecx
1436; AVX2-NEXT: orl %edx, %ecx
1437; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1438; AVX2-NEXT: andl $1, %edx
1439; AVX2-NEXT: shll $27, %edx
1440; AVX2-NEXT: orl %ecx, %edx
1441; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1442; AVX2-NEXT: andl $1, %ecx
1443; AVX2-NEXT: shll $28, %ecx
1444; AVX2-NEXT: orl %edx, %ecx
1445; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1446; AVX2-NEXT: andl $1, %edx
1447; AVX2-NEXT: shll $29, %edx
1448; AVX2-NEXT: orl %ecx, %edx
1449; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1450; AVX2-NEXT: andl $1, %ecx
1451; AVX2-NEXT: shll $30, %ecx
1452; AVX2-NEXT: orl %edx, %ecx
1453; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1454; AVX2-NEXT: shll $31, %edx
1455; AVX2-NEXT: orl %ecx, %edx
1456; AVX2-NEXT: orl %eax, %edx
1457; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +00001458; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Craig Topperf9c30ed2019-04-25 18:19:59 +00001459; AVX2-NEXT: vmovd %xmm0, %ecx
1460; AVX2-NEXT: vpextrd $1, %xmm0, %eax
1461; AVX2-NEXT: addl %ecx, %eax
1462; AVX2-NEXT: vzeroupper
1463; AVX2-NEXT: retq
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001464;
Simon Pilgrim0a7d1b32019-04-25 09:34:36 +00001465; AVX512-LABEL: bitcast_v64i8_to_v2i32:
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001466; AVX512: # %bb.0:
1467; AVX512-NEXT: vpmovb2m %zmm0, %k0
1468; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
Craig Topper3de33242019-08-05 18:25:36 +00001469; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
Simon Pilgrim10daecb2019-04-24 17:25:45 +00001470; AVX512-NEXT: vmovd %xmm0, %ecx
1471; AVX512-NEXT: vpextrd $1, %xmm0, %eax
1472; AVX512-NEXT: addl %ecx, %eax
1473; AVX512-NEXT: vzeroupper
1474; AVX512-NEXT: retq
1475 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1476 %2 = bitcast <64 x i1> %1 to <2 x i32>
1477 %3 = extractelement <2 x i32> %2, i32 0
1478 %4 = extractelement <2 x i32> %2, i32 1
1479 %5 = add i32 %3, %4
1480 ret i32 %5
1481}