blob: a190e057552285a933f6899d11e20f50b61d7746 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
9; SSE2-SSSE3-LABEL: bitcast_i2_2i1:
10; SSE2-SSSE3: # BB#0:
11; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
12; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
13; SSE2-SSSE3-NEXT: movl %eax, %ecx
14; SSE2-SSSE3-NEXT: andl $1, %ecx
15; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
16; SSE2-SSSE3-NEXT: shrl %eax
17; SSE2-SSSE3-NEXT: andl $1, %eax
18; SSE2-SSSE3-NEXT: movq %rax, %xmm1
19; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
20; SSE2-SSSE3-NEXT: retq
21;
22; AVX12-LABEL: bitcast_i2_2i1:
23; AVX12: # BB#0:
24; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
25; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
26; AVX12-NEXT: movl %eax, %ecx
27; AVX12-NEXT: andl $1, %ecx
28; AVX12-NEXT: vmovq %rcx, %xmm0
29; AVX12-NEXT: shrl %eax
30; AVX12-NEXT: andl $1, %eax
31; AVX12-NEXT: vmovq %rax, %xmm1
32; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
33; AVX12-NEXT: retq
34;
35; AVX512-LABEL: bitcast_i2_2i1:
36; AVX512: # BB#0:
37; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
38; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
39; AVX512-NEXT: kmovd %eax, %k1
40; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
41; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
42; AVX512-NEXT: vzeroupper
43; AVX512-NEXT: retq
44 %1 = bitcast i2 %a0 to <2 x i1>
45 ret <2 x i1> %1
46}
47
48define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {
49; SSE2-SSSE3-LABEL: bitcast_i4_4i1:
50; SSE2-SSSE3: # BB#0:
51; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
52; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
53; SSE2-SSSE3-NEXT: movl %eax, %ecx
54; SSE2-SSSE3-NEXT: shrl $3, %ecx
55; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
56; SSE2-SSSE3-NEXT: movl %eax, %ecx
57; SSE2-SSSE3-NEXT: shrl $2, %ecx
58; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
59; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
60; SSE2-SSSE3-NEXT: movd %eax, %xmm0
61; SSE2-SSSE3-NEXT: shrl %eax
62; SSE2-SSSE3-NEXT: movd %eax, %xmm2
63; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
64; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
65; SSE2-SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
66; SSE2-SSSE3-NEXT: retq
67;
68; AVX1-LABEL: bitcast_i4_4i1:
69; AVX1: # BB#0:
70; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
71; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %eax
72; AVX1-NEXT: movl %eax, %ecx
73; AVX1-NEXT: shrl %ecx
74; AVX1-NEXT: vmovd %eax, %xmm0
75; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
76; AVX1-NEXT: movl %eax, %ecx
77; AVX1-NEXT: shrl $2, %ecx
78; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
79; AVX1-NEXT: shrl $3, %eax
80; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
81; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
82; AVX1-NEXT: retq
83;
84; AVX2-LABEL: bitcast_i4_4i1:
85; AVX2: # BB#0:
86; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
87; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
88; AVX2-NEXT: movl %eax, %ecx
89; AVX2-NEXT: shrl %ecx
90; AVX2-NEXT: vmovd %eax, %xmm0
91; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
92; AVX2-NEXT: movl %eax, %ecx
93; AVX2-NEXT: shrl $2, %ecx
94; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
95; AVX2-NEXT: shrl $3, %eax
96; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
97; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
98; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
99; AVX2-NEXT: retq
100;
101; AVX512-LABEL: bitcast_i4_4i1:
102; AVX512: # BB#0:
103; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
104; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
105; AVX512-NEXT: kmovd %eax, %k1
106; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
107; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
108; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
109; AVX512-NEXT: vzeroupper
110; AVX512-NEXT: retq
111 %1 = bitcast i4 %a0 to <4 x i1>
112 ret <4 x i1> %1
113}
114
115define <8 x i1> @bitcast_i8_8i1(i8 zeroext %a0) {
116; SSE2-SSSE3-LABEL: bitcast_i8_8i1:
117; SSE2-SSSE3: # BB#0:
118; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
119; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
120; SSE2-SSSE3-NEXT: movl %eax, %ecx
121; SSE2-SSSE3-NEXT: shrl $3, %ecx
122; SSE2-SSSE3-NEXT: andl $1, %ecx
123; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
124; SSE2-SSSE3-NEXT: movl %eax, %ecx
125; SSE2-SSSE3-NEXT: shrl $2, %ecx
126; SSE2-SSSE3-NEXT: andl $1, %ecx
127; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
128; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
129; SSE2-SSSE3-NEXT: movl %eax, %ecx
130; SSE2-SSSE3-NEXT: andl $1, %ecx
131; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
132; SSE2-SSSE3-NEXT: movl %eax, %ecx
133; SSE2-SSSE3-NEXT: shrl %ecx
134; SSE2-SSSE3-NEXT: andl $1, %ecx
135; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
136; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
137; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
138; SSE2-SSSE3-NEXT: movl %eax, %ecx
139; SSE2-SSSE3-NEXT: shrl $5, %ecx
140; SSE2-SSSE3-NEXT: andl $1, %ecx
141; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
142; SSE2-SSSE3-NEXT: movl %eax, %ecx
143; SSE2-SSSE3-NEXT: shrl $4, %ecx
144; SSE2-SSSE3-NEXT: andl $1, %ecx
145; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
146; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
147; SSE2-SSSE3-NEXT: movl %eax, %ecx
148; SSE2-SSSE3-NEXT: shrl $6, %ecx
149; SSE2-SSSE3-NEXT: andl $1, %ecx
150; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
151; SSE2-SSSE3-NEXT: shrl $7, %eax
152; SSE2-SSSE3-NEXT: movzwl %ax, %eax
153; SSE2-SSSE3-NEXT: movd %eax, %xmm3
154; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
155; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
156; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
157; SSE2-SSSE3-NEXT: retq
158;
159; AVX12-LABEL: bitcast_i8_8i1:
160; AVX12: # BB#0:
161; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
162; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
163; AVX12-NEXT: movl %eax, %ecx
164; AVX12-NEXT: shrl %ecx
165; AVX12-NEXT: andl $1, %ecx
166; AVX12-NEXT: movl %eax, %edx
167; AVX12-NEXT: andl $1, %edx
168; AVX12-NEXT: vmovd %edx, %xmm0
169; AVX12-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
170; AVX12-NEXT: movl %eax, %ecx
171; AVX12-NEXT: shrl $2, %ecx
172; AVX12-NEXT: andl $1, %ecx
173; AVX12-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
174; AVX12-NEXT: movl %eax, %ecx
175; AVX12-NEXT: shrl $3, %ecx
176; AVX12-NEXT: andl $1, %ecx
177; AVX12-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
178; AVX12-NEXT: movl %eax, %ecx
179; AVX12-NEXT: shrl $4, %ecx
180; AVX12-NEXT: andl $1, %ecx
181; AVX12-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
182; AVX12-NEXT: movl %eax, %ecx
183; AVX12-NEXT: shrl $5, %ecx
184; AVX12-NEXT: andl $1, %ecx
185; AVX12-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
186; AVX12-NEXT: movl %eax, %ecx
187; AVX12-NEXT: shrl $6, %ecx
188; AVX12-NEXT: andl $1, %ecx
189; AVX12-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
190; AVX12-NEXT: shrl $7, %eax
191; AVX12-NEXT: movzwl %ax, %eax
192; AVX12-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
193; AVX12-NEXT: retq
194;
195; AVX512-LABEL: bitcast_i8_8i1:
196; AVX512: # BB#0:
197; AVX512-NEXT: kmovd %edi, %k0
198; AVX512-NEXT: vpmovm2w %k0, %xmm0
199; AVX512-NEXT: retq
200 %1 = bitcast i8 %a0 to <8 x i1>
201 ret <8 x i1> %1
202}
203
204define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
205; SSE2-SSSE3-LABEL: bitcast_i16_16i1:
206; SSE2-SSSE3: # BB#0:
207; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
208; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
209; SSE2-SSSE3-NEXT: movl %eax, %ecx
210; SSE2-SSSE3-NEXT: shrl $7, %ecx
211; SSE2-SSSE3-NEXT: andl $1, %ecx
212; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
213; SSE2-SSSE3-NEXT: movl %eax, %ecx
214; SSE2-SSSE3-NEXT: shrl $6, %ecx
215; SSE2-SSSE3-NEXT: andl $1, %ecx
216; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
217; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
218; SSE2-SSSE3-NEXT: movl %eax, %ecx
219; SSE2-SSSE3-NEXT: shrl $5, %ecx
220; SSE2-SSSE3-NEXT: andl $1, %ecx
221; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
222; SSE2-SSSE3-NEXT: movl %eax, %ecx
223; SSE2-SSSE3-NEXT: shrl $4, %ecx
224; SSE2-SSSE3-NEXT: andl $1, %ecx
225; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
226; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
227; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
228; SSE2-SSSE3-NEXT: movl %eax, %ecx
229; SSE2-SSSE3-NEXT: shrl $3, %ecx
230; SSE2-SSSE3-NEXT: andl $1, %ecx
231; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
232; SSE2-SSSE3-NEXT: movl %eax, %ecx
233; SSE2-SSSE3-NEXT: shrl $2, %ecx
234; SSE2-SSSE3-NEXT: andl $1, %ecx
235; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
236; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
237; SSE2-SSSE3-NEXT: movl %eax, %ecx
238; SSE2-SSSE3-NEXT: andl $1, %ecx
239; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
240; SSE2-SSSE3-NEXT: movl %eax, %ecx
241; SSE2-SSSE3-NEXT: shrl %ecx
242; SSE2-SSSE3-NEXT: andl $1, %ecx
243; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
244; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
245; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
247; SSE2-SSSE3-NEXT: movl %eax, %ecx
248; SSE2-SSSE3-NEXT: shrl $11, %ecx
249; SSE2-SSSE3-NEXT: andl $1, %ecx
250; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
251; SSE2-SSSE3-NEXT: movl %eax, %ecx
252; SSE2-SSSE3-NEXT: shrl $10, %ecx
253; SSE2-SSSE3-NEXT: andl $1, %ecx
254; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
255; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
256; SSE2-SSSE3-NEXT: movl %eax, %ecx
257; SSE2-SSSE3-NEXT: shrl $9, %ecx
258; SSE2-SSSE3-NEXT: andl $1, %ecx
259; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
260; SSE2-SSSE3-NEXT: movl %eax, %ecx
261; SSE2-SSSE3-NEXT: shrl $8, %ecx
262; SSE2-SSSE3-NEXT: andl $1, %ecx
263; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
264; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
265; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
266; SSE2-SSSE3-NEXT: movl %eax, %ecx
267; SSE2-SSSE3-NEXT: shrl $13, %ecx
268; SSE2-SSSE3-NEXT: andl $1, %ecx
269; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
270; SSE2-SSSE3-NEXT: movl %eax, %ecx
271; SSE2-SSSE3-NEXT: shrl $12, %ecx
272; SSE2-SSSE3-NEXT: andl $1, %ecx
273; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
274; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
275; SSE2-SSSE3-NEXT: movl %eax, %ecx
276; SSE2-SSSE3-NEXT: shrl $14, %ecx
277; SSE2-SSSE3-NEXT: andl $1, %ecx
278; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
279; SSE2-SSSE3-NEXT: shrl $15, %eax
280; SSE2-SSSE3-NEXT: movzwl %ax, %eax
281; SSE2-SSSE3-NEXT: movd %eax, %xmm4
282; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
283; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
284; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
285; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
286; SSE2-SSSE3-NEXT: retq
287;
288; AVX12-LABEL: bitcast_i16_16i1:
289; AVX12: # BB#0:
290; AVX12-NEXT: movw %di, -{{[0-9]+}}(%rsp)
291; AVX12-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
292; AVX12-NEXT: movl %eax, %ecx
293; AVX12-NEXT: shrl %ecx
294; AVX12-NEXT: andl $1, %ecx
295; AVX12-NEXT: movl %eax, %edx
296; AVX12-NEXT: andl $1, %edx
297; AVX12-NEXT: vmovd %edx, %xmm0
298; AVX12-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
299; AVX12-NEXT: movl %eax, %ecx
300; AVX12-NEXT: shrl $2, %ecx
301; AVX12-NEXT: andl $1, %ecx
302; AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
303; AVX12-NEXT: movl %eax, %ecx
304; AVX12-NEXT: shrl $3, %ecx
305; AVX12-NEXT: andl $1, %ecx
306; AVX12-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
307; AVX12-NEXT: movl %eax, %ecx
308; AVX12-NEXT: shrl $4, %ecx
309; AVX12-NEXT: andl $1, %ecx
310; AVX12-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
311; AVX12-NEXT: movl %eax, %ecx
312; AVX12-NEXT: shrl $5, %ecx
313; AVX12-NEXT: andl $1, %ecx
314; AVX12-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
315; AVX12-NEXT: movl %eax, %ecx
316; AVX12-NEXT: shrl $6, %ecx
317; AVX12-NEXT: andl $1, %ecx
318; AVX12-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
319; AVX12-NEXT: movl %eax, %ecx
320; AVX12-NEXT: shrl $7, %ecx
321; AVX12-NEXT: andl $1, %ecx
322; AVX12-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
323; AVX12-NEXT: movl %eax, %ecx
324; AVX12-NEXT: shrl $8, %ecx
325; AVX12-NEXT: andl $1, %ecx
326; AVX12-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
327; AVX12-NEXT: movl %eax, %ecx
328; AVX12-NEXT: shrl $9, %ecx
329; AVX12-NEXT: andl $1, %ecx
330; AVX12-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
331; AVX12-NEXT: movl %eax, %ecx
332; AVX12-NEXT: shrl $10, %ecx
333; AVX12-NEXT: andl $1, %ecx
334; AVX12-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
335; AVX12-NEXT: movl %eax, %ecx
336; AVX12-NEXT: shrl $11, %ecx
337; AVX12-NEXT: andl $1, %ecx
338; AVX12-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
339; AVX12-NEXT: movl %eax, %ecx
340; AVX12-NEXT: shrl $12, %ecx
341; AVX12-NEXT: andl $1, %ecx
342; AVX12-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
343; AVX12-NEXT: movl %eax, %ecx
344; AVX12-NEXT: shrl $13, %ecx
345; AVX12-NEXT: andl $1, %ecx
346; AVX12-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
347; AVX12-NEXT: movl %eax, %ecx
348; AVX12-NEXT: shrl $14, %ecx
349; AVX12-NEXT: andl $1, %ecx
350; AVX12-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
351; AVX12-NEXT: shrl $15, %eax
352; AVX12-NEXT: movzwl %ax, %eax
353; AVX12-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
354; AVX12-NEXT: retq
355;
356; AVX512-LABEL: bitcast_i16_16i1:
357; AVX512: # BB#0:
358; AVX512-NEXT: kmovd %edi, %k0
359; AVX512-NEXT: vpmovm2b %k0, %xmm0
360; AVX512-NEXT: retq
361 %1 = bitcast i16 %a0 to <16 x i1>
362 ret <16 x i1> %1
363}
364
365define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
366; SSE2-SSSE3-LABEL: bitcast_i32_32i1:
367; SSE2-SSSE3: # BB#0:
368; SSE2-SSSE3-NEXT: movl %esi, (%rdi)
369; SSE2-SSSE3-NEXT: movq %rdi, %rax
370; SSE2-SSSE3-NEXT: retq
371;
372; AVX1-LABEL: bitcast_i32_32i1:
373; AVX1: # BB#0:
374; AVX1-NEXT: pushq %rbp
375; AVX1-NEXT: .Lcfi0:
376; AVX1-NEXT: .cfi_def_cfa_offset 16
377; AVX1-NEXT: .Lcfi1:
378; AVX1-NEXT: .cfi_offset %rbp, -16
379; AVX1-NEXT: movq %rsp, %rbp
380; AVX1-NEXT: .Lcfi2:
381; AVX1-NEXT: .cfi_def_cfa_register %rbp
382; AVX1-NEXT: andq $-32, %rsp
383; AVX1-NEXT: subq $32, %rsp
384; AVX1-NEXT: movl %edi, %eax
385; AVX1-NEXT: shrl $17, %eax
386; AVX1-NEXT: andl $1, %eax
387; AVX1-NEXT: movl %edi, %ecx
388; AVX1-NEXT: shrl $16, %ecx
389; AVX1-NEXT: andl $1, %ecx
390; AVX1-NEXT: vmovd %ecx, %xmm0
391; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
392; AVX1-NEXT: movl %edi, %eax
393; AVX1-NEXT: shrl $18, %eax
394; AVX1-NEXT: andl $1, %eax
395; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
396; AVX1-NEXT: movl %edi, %eax
397; AVX1-NEXT: shrl $19, %eax
398; AVX1-NEXT: andl $1, %eax
399; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
400; AVX1-NEXT: movl %edi, %eax
401; AVX1-NEXT: shrl $20, %eax
402; AVX1-NEXT: andl $1, %eax
403; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
404; AVX1-NEXT: movl %edi, %eax
405; AVX1-NEXT: shrl $21, %eax
406; AVX1-NEXT: andl $1, %eax
407; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
408; AVX1-NEXT: movl %edi, %eax
409; AVX1-NEXT: shrl $22, %eax
410; AVX1-NEXT: andl $1, %eax
411; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
412; AVX1-NEXT: movl %edi, %eax
413; AVX1-NEXT: shrl $23, %eax
414; AVX1-NEXT: andl $1, %eax
415; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
416; AVX1-NEXT: movl %edi, %eax
417; AVX1-NEXT: shrl $24, %eax
418; AVX1-NEXT: andl $1, %eax
419; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
420; AVX1-NEXT: movl %edi, %eax
421; AVX1-NEXT: shrl $25, %eax
422; AVX1-NEXT: andl $1, %eax
423; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
424; AVX1-NEXT: movl %edi, %eax
425; AVX1-NEXT: shrl $26, %eax
426; AVX1-NEXT: andl $1, %eax
427; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
428; AVX1-NEXT: movl %edi, %eax
429; AVX1-NEXT: shrl $27, %eax
430; AVX1-NEXT: andl $1, %eax
431; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
432; AVX1-NEXT: movl %edi, %eax
433; AVX1-NEXT: shrl $28, %eax
434; AVX1-NEXT: andl $1, %eax
435; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
436; AVX1-NEXT: movl %edi, %eax
437; AVX1-NEXT: shrl $29, %eax
438; AVX1-NEXT: andl $1, %eax
439; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
440; AVX1-NEXT: movl %edi, %eax
441; AVX1-NEXT: shrl $30, %eax
442; AVX1-NEXT: andl $1, %eax
443; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
444; AVX1-NEXT: movl %edi, %eax
445; AVX1-NEXT: shrl $31, %eax
446; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
447; AVX1-NEXT: movl %edi, %eax
448; AVX1-NEXT: shrl %eax
449; AVX1-NEXT: andl $1, %eax
450; AVX1-NEXT: movl %edi, %ecx
451; AVX1-NEXT: andl $1, %ecx
452; AVX1-NEXT: vmovd %ecx, %xmm1
453; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
454; AVX1-NEXT: movl %edi, %eax
455; AVX1-NEXT: shrl $2, %eax
456; AVX1-NEXT: andl $1, %eax
457; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
458; AVX1-NEXT: movl %edi, %eax
459; AVX1-NEXT: shrl $3, %eax
460; AVX1-NEXT: andl $1, %eax
461; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
462; AVX1-NEXT: movl %edi, %eax
463; AVX1-NEXT: shrl $4, %eax
464; AVX1-NEXT: andl $1, %eax
465; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
466; AVX1-NEXT: movl %edi, %eax
467; AVX1-NEXT: shrl $5, %eax
468; AVX1-NEXT: andl $1, %eax
469; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
470; AVX1-NEXT: movl %edi, %eax
471; AVX1-NEXT: shrl $6, %eax
472; AVX1-NEXT: andl $1, %eax
473; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
474; AVX1-NEXT: movl %edi, %eax
475; AVX1-NEXT: shrl $7, %eax
476; AVX1-NEXT: andl $1, %eax
477; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
478; AVX1-NEXT: movl %edi, %eax
479; AVX1-NEXT: shrl $8, %eax
480; AVX1-NEXT: andl $1, %eax
481; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
482; AVX1-NEXT: movl %edi, %eax
483; AVX1-NEXT: shrl $9, %eax
484; AVX1-NEXT: andl $1, %eax
485; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
486; AVX1-NEXT: movl %edi, %eax
487; AVX1-NEXT: shrl $10, %eax
488; AVX1-NEXT: andl $1, %eax
489; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
490; AVX1-NEXT: movl %edi, %eax
491; AVX1-NEXT: shrl $11, %eax
492; AVX1-NEXT: andl $1, %eax
493; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
494; AVX1-NEXT: movl %edi, %eax
495; AVX1-NEXT: shrl $12, %eax
496; AVX1-NEXT: andl $1, %eax
497; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
498; AVX1-NEXT: movl %edi, %eax
499; AVX1-NEXT: shrl $13, %eax
500; AVX1-NEXT: andl $1, %eax
501; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
502; AVX1-NEXT: movl %edi, %eax
503; AVX1-NEXT: shrl $14, %eax
504; AVX1-NEXT: andl $1, %eax
505; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
506; AVX1-NEXT: shrl $15, %edi
507; AVX1-NEXT: andl $1, %edi
508; AVX1-NEXT: vpinsrb $15, %edi, %xmm1, %xmm1
509; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
510; AVX1-NEXT: movq %rbp, %rsp
511; AVX1-NEXT: popq %rbp
512; AVX1-NEXT: retq
513;
514; AVX2-LABEL: bitcast_i32_32i1:
515; AVX2: # BB#0:
516; AVX2-NEXT: pushq %rbp
517; AVX2-NEXT: .Lcfi0:
518; AVX2-NEXT: .cfi_def_cfa_offset 16
519; AVX2-NEXT: .Lcfi1:
520; AVX2-NEXT: .cfi_offset %rbp, -16
521; AVX2-NEXT: movq %rsp, %rbp
522; AVX2-NEXT: .Lcfi2:
523; AVX2-NEXT: .cfi_def_cfa_register %rbp
524; AVX2-NEXT: andq $-32, %rsp
525; AVX2-NEXT: subq $32, %rsp
526; AVX2-NEXT: movl %edi, %eax
527; AVX2-NEXT: shrl $17, %eax
528; AVX2-NEXT: andl $1, %eax
529; AVX2-NEXT: movl %edi, %ecx
530; AVX2-NEXT: shrl $16, %ecx
531; AVX2-NEXT: andl $1, %ecx
532; AVX2-NEXT: vmovd %ecx, %xmm0
533; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
534; AVX2-NEXT: movl %edi, %eax
535; AVX2-NEXT: shrl $18, %eax
536; AVX2-NEXT: andl $1, %eax
537; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
538; AVX2-NEXT: movl %edi, %eax
539; AVX2-NEXT: shrl $19, %eax
540; AVX2-NEXT: andl $1, %eax
541; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
542; AVX2-NEXT: movl %edi, %eax
543; AVX2-NEXT: shrl $20, %eax
544; AVX2-NEXT: andl $1, %eax
545; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
546; AVX2-NEXT: movl %edi, %eax
547; AVX2-NEXT: shrl $21, %eax
548; AVX2-NEXT: andl $1, %eax
549; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
550; AVX2-NEXT: movl %edi, %eax
551; AVX2-NEXT: shrl $22, %eax
552; AVX2-NEXT: andl $1, %eax
553; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
554; AVX2-NEXT: movl %edi, %eax
555; AVX2-NEXT: shrl $23, %eax
556; AVX2-NEXT: andl $1, %eax
557; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
558; AVX2-NEXT: movl %edi, %eax
559; AVX2-NEXT: shrl $24, %eax
560; AVX2-NEXT: andl $1, %eax
561; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
562; AVX2-NEXT: movl %edi, %eax
563; AVX2-NEXT: shrl $25, %eax
564; AVX2-NEXT: andl $1, %eax
565; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
566; AVX2-NEXT: movl %edi, %eax
567; AVX2-NEXT: shrl $26, %eax
568; AVX2-NEXT: andl $1, %eax
569; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
570; AVX2-NEXT: movl %edi, %eax
571; AVX2-NEXT: shrl $27, %eax
572; AVX2-NEXT: andl $1, %eax
573; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
574; AVX2-NEXT: movl %edi, %eax
575; AVX2-NEXT: shrl $28, %eax
576; AVX2-NEXT: andl $1, %eax
577; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
578; AVX2-NEXT: movl %edi, %eax
579; AVX2-NEXT: shrl $29, %eax
580; AVX2-NEXT: andl $1, %eax
581; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
582; AVX2-NEXT: movl %edi, %eax
583; AVX2-NEXT: shrl $30, %eax
584; AVX2-NEXT: andl $1, %eax
585; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
586; AVX2-NEXT: movl %edi, %eax
587; AVX2-NEXT: shrl $31, %eax
588; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
589; AVX2-NEXT: movl %edi, %eax
590; AVX2-NEXT: shrl %eax
591; AVX2-NEXT: andl $1, %eax
592; AVX2-NEXT: movl %edi, %ecx
593; AVX2-NEXT: andl $1, %ecx
594; AVX2-NEXT: vmovd %ecx, %xmm1
595; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
596; AVX2-NEXT: movl %edi, %eax
597; AVX2-NEXT: shrl $2, %eax
598; AVX2-NEXT: andl $1, %eax
599; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
600; AVX2-NEXT: movl %edi, %eax
601; AVX2-NEXT: shrl $3, %eax
602; AVX2-NEXT: andl $1, %eax
603; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
604; AVX2-NEXT: movl %edi, %eax
605; AVX2-NEXT: shrl $4, %eax
606; AVX2-NEXT: andl $1, %eax
607; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
608; AVX2-NEXT: movl %edi, %eax
609; AVX2-NEXT: shrl $5, %eax
610; AVX2-NEXT: andl $1, %eax
611; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
612; AVX2-NEXT: movl %edi, %eax
613; AVX2-NEXT: shrl $6, %eax
614; AVX2-NEXT: andl $1, %eax
615; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
616; AVX2-NEXT: movl %edi, %eax
617; AVX2-NEXT: shrl $7, %eax
618; AVX2-NEXT: andl $1, %eax
619; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
620; AVX2-NEXT: movl %edi, %eax
621; AVX2-NEXT: shrl $8, %eax
622; AVX2-NEXT: andl $1, %eax
623; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
624; AVX2-NEXT: movl %edi, %eax
625; AVX2-NEXT: shrl $9, %eax
626; AVX2-NEXT: andl $1, %eax
627; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
628; AVX2-NEXT: movl %edi, %eax
629; AVX2-NEXT: shrl $10, %eax
630; AVX2-NEXT: andl $1, %eax
631; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
632; AVX2-NEXT: movl %edi, %eax
633; AVX2-NEXT: shrl $11, %eax
634; AVX2-NEXT: andl $1, %eax
635; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
636; AVX2-NEXT: movl %edi, %eax
637; AVX2-NEXT: shrl $12, %eax
638; AVX2-NEXT: andl $1, %eax
639; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
640; AVX2-NEXT: movl %edi, %eax
641; AVX2-NEXT: shrl $13, %eax
642; AVX2-NEXT: andl $1, %eax
643; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
644; AVX2-NEXT: movl %edi, %eax
645; AVX2-NEXT: shrl $14, %eax
646; AVX2-NEXT: andl $1, %eax
647; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
648; AVX2-NEXT: shrl $15, %edi
649; AVX2-NEXT: andl $1, %edi
650; AVX2-NEXT: vpinsrb $15, %edi, %xmm1, %xmm1
651; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
652; AVX2-NEXT: movq %rbp, %rsp
653; AVX2-NEXT: popq %rbp
654; AVX2-NEXT: retq
655;
656; AVX512-LABEL: bitcast_i32_32i1:
657; AVX512: # BB#0:
658; AVX512-NEXT: kmovd %edi, %k0
659; AVX512-NEXT: vpmovm2b %k0, %ymm0
660; AVX512-NEXT: retq
661 %1 = bitcast i32 %a0 to <32 x i1>
662 ret <32 x i1> %1
663}
664
665define <64 x i1> @bitcast_i64_64i1(i64 %a0) {
666; SSE2-SSSE3-LABEL: bitcast_i64_64i1:
667; SSE2-SSSE3: # BB#0:
668; SSE2-SSSE3-NEXT: movq %rsi, (%rdi)
669; SSE2-SSSE3-NEXT: movq %rdi, %rax
670; SSE2-SSSE3-NEXT: retq
671;
672; AVX12-LABEL: bitcast_i64_64i1:
673; AVX12: # BB#0:
674; AVX12-NEXT: movq %rsi, (%rdi)
675; AVX12-NEXT: movq %rdi, %rax
676; AVX12-NEXT: retq
677;
678; AVX512-LABEL: bitcast_i64_64i1:
679; AVX512: # BB#0:
680; AVX512-NEXT: kmovq %rdi, %k0
681; AVX512-NEXT: vpmovm2b %k0, %zmm0
682; AVX512-NEXT: retq
683 %1 = bitcast i64 %a0 to <64 x i1>
684 ret <64 x i1> %1
685}