blob: 77e3c537dfe70206c395061986da192cd62ad225 [file] [log] [blame]
Simon Pilgrim730f83a2016-10-15 19:29:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +00008; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
9; X32-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000010; X32-NEXT: retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +000014; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
15; X64-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000016; X64-NEXT: retq
17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18 %2 = extractelement <8 x i16> %1, i32 0
19 %3 = sext i16 %2 to i32
20 ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32: # BB#0:
26; X32-NEXT: pushl %ebp
27; X32-NEXT: movl %esp, %ebp
28; X32-NEXT: andl $-8, %esp
29; X32-NEXT: subl $16, %esp
30; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
31; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000032; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000033; X32-NEXT: fildll {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000034; X32-NEXT: fstps {{[0-9]+}}(%esp)
Simon Pilgrim75a697a2016-10-29 11:29:39 +000035; X32-NEXT: flds {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000036; X32-NEXT: movl %ebp, %esp
37; X32-NEXT: popl %ebp
38; X32-NEXT: retl
39;
40; X64-LABEL: knownbits_mask_extract_uitofp:
41; X64: # BB#0:
42; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
43; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
44; X64-NEXT: vmovq %xmm0, %rax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000045; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
46; X64-NEXT: retq
Simon Pilgrim730f83a2016-10-15 19:29:26 +000047 %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
48 %2 = extractelement <2 x i64> %1, i32 0
49 %3 = uitofp i64 %2 to float
50 ret float %3
51}
52
Simon Pilgrimb421ef22016-12-07 15:27:18 +000053define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
54; X32-LABEL: knownbits_insert_uitofp:
55; X32: # BB#0:
56; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
57; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
58; X32-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
59; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
60; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
Simon Pilgrim017b7a72016-12-09 17:53:11 +000061; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimb421ef22016-12-07 15:27:18 +000062; X32-NEXT: retl
63;
64; X64-LABEL: knownbits_insert_uitofp:
65; X64: # BB#0:
66; X64-NEXT: movzwl %di, %eax
67; X64-NEXT: movzwl %si, %ecx
68; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
69; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
70; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
Simon Pilgrim017b7a72016-12-09 17:53:11 +000071; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimb421ef22016-12-07 15:27:18 +000072; X64-NEXT: retq
73 %1 = zext i16 %a1 to i32
74 %2 = zext i16 %a2 to i32
75 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
76 %4 = insertelement <4 x i32> %3, i32 %2, i32 2
77 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
78 %6 = uitofp <4 x i32> %5 to <4 x float>
79 ret <4 x float> %6
80}
81
Simon Pilgrim730f83a2016-10-15 19:29:26 +000082define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
83; X32-LABEL: knownbits_mask_shuffle_sext:
84; X32: # BB#0:
85; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000086; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
87; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000088; X32-NEXT: retl
89;
90; X64-LABEL: knownbits_mask_shuffle_sext:
91; X64: # BB#0:
92; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000093; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
94; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000095; X64-NEXT: retq
96 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
97 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98 %3 = sext <4 x i16> %2 to <4 x i32>
99 ret <4 x i32> %3
100}
101
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000102define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
103; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
104; X32: # BB#0:
105; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim1577b392016-12-06 18:58:25 +0000106; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
107; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
108; X32-NEXT: retl
109;
110; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
111; X64: # BB#0:
112; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
113; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
114; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
115; X64-NEXT: retq
116 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
117 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
118 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
119 %4 = sext <4 x i16> %3 to <4 x i32>
120 ret <4 x i32> %4
121}
122
123define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
124; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
125; X32: # BB#0:
126; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000127; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
128; X32-NEXT: vpmovsxwd %xmm0, %xmm0
129; X32-NEXT: retl
130;
Simon Pilgrim1577b392016-12-06 18:58:25 +0000131; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000132; X64: # BB#0:
133; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
134; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
135; X64-NEXT: vpmovsxwd %xmm0, %xmm0
136; X64-NEXT: retq
137 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
Simon Pilgrim1577b392016-12-06 18:58:25 +0000138 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000139 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
140 %4 = sext <4 x i16> %3 to <4 x i32>
141 ret <4 x i32> %4
142}
143
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000144define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
145; X32-LABEL: knownbits_mask_shuffle_uitofp:
146; X32: # BB#0:
147; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
148; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000149; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000150; X32-NEXT: retl
151;
152; X64-LABEL: knownbits_mask_shuffle_uitofp:
153; X64: # BB#0:
154; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
155; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000156; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000157; X64-NEXT: retq
158 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
159 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
160 %3 = uitofp <4 x i32> %2 to <4 x float>
161 ret <4 x float> %3
162}
Simon Pilgrimc1041852016-11-06 16:05:59 +0000163
164define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
165; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
166; X32: # BB#0:
167; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
168; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
169; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000170; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000171; X32-NEXT: retl
172;
173; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
174; X64: # BB#0:
175; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
176; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
177; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000178; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000179; X64-NEXT: retq
180 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
181 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
182 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
183 %4 = uitofp <4 x i32> %3 to <4 x float>
184 ret <4 x float> %4
185}
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000186
187define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
188; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
189; X32: # BB#0:
190; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
191; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0
192; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000193; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000194; X32-NEXT: retl
195;
196; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
197; X64: # BB#0:
198; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
199; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
200; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000201; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000202; X64-NEXT: retq
203 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
204 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
205 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
206 %4 = uitofp <4 x i32> %3 to <4 x float>
207 ret <4 x float> %4
208}
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000209
210define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
211; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
212; X32: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000213; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000214; X32-NEXT: retl
215;
216; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
217; X64: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000218; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000219; X64-NEXT: retq
220 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
221 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
222 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
223 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
224 ret <4 x i32> %4
225}
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000226
227define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
228; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
229; X32: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000230; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000231; X32-NEXT: retl
232;
233; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
234; X64: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000235; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000236; X64-NEXT: retq
237 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
238 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
239 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
240 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
241 ret <4 x i32> %4
242}
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000243
244define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
245; X32-LABEL: knownbits_mask_mul_shuffle_shl:
246; X32: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000247; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000248; X32-NEXT: retl
249;
250; X64-LABEL: knownbits_mask_mul_shuffle_shl:
251; X64: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000252; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000253; X64-NEXT: retq
254 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
255 %2 = mul <4 x i32> %a1, %1
256 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
257 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
258 ret <4 x i32> %4
259}
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000260
261define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
262; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
263; X32: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000264; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000265; X32-NEXT: retl
266;
267; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
268; X64: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000269; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000270; X64-NEXT: retq
271 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
272 %2 = trunc <4 x i64> %1 to <4 x i32>
273 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
274 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
275 ret <4 x i32> %4
276}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000277
Simon Pilgrima0dee612016-11-10 22:34:12 +0000278define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
279; X32-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000280; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000281; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000282; X32-NEXT: retl
283;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000284; X64-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000285; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000286; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000287; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000288 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
289 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000290 %3 = add <4 x i32> %1, %2
Simon Pilgrima0dee612016-11-10 22:34:12 +0000291 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
292 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
293 ret <4 x i32> %5
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000294}
295
Simon Pilgrima0dee612016-11-10 22:34:12 +0000296define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
297; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000298; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000299; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000300; X32-NEXT: retl
301;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000302; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000303; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000304; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000305; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000306 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000307 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
Simon Pilgrima0dee612016-11-10 22:34:12 +0000308 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
309 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
310 ret <4 x i32> %4
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000311}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000312
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000313define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
314; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
315; X32: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000316; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000317; X32-NEXT: retl
318;
319; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
320; X64: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000321; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000322; X64-NEXT: retq
323 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
324 %2 = udiv <4 x i32> %1, %a1
325 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
326 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
327 ret <4 x i32> %4
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000328}
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000329
330define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
331; X32-LABEL: knownbits_urem_lshr:
332; X32: # BB#0:
333; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
334; X32-NEXT: retl
335;
336; X64-LABEL: knownbits_urem_lshr:
337; X64: # BB#0:
338; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
339; X64-NEXT: retq
340 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
341 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
342 ret <4 x i32> %2
343}
344
345define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
346; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
347; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000348; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000349; X32-NEXT: retl
350;
351; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
352; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000353; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000354; X64-NEXT: retq
355 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
356 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
357 %3 = urem <4 x i32> %1, %2
358 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
359 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
360 ret <4 x i32> %5
361}
362
363define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
364; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
365; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000366; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000367; X32-NEXT: retl
368;
369; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
370; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000371; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000372; X64-NEXT: retq
373 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
374 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
375 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
376 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
377 ret <4 x i32> %4
378}
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000379
380define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
381; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
382; X32: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000383; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000384; X32-NEXT: retl
385;
386; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
387; X64: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000388; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000389; X64-NEXT: retq
390 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
391 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
392 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
393 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
394 ret <4 x i32> %4
395}
396declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000397
398define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
399; X32-LABEL: knownbits_mask_concat_uitofp:
400; X32: # BB#0:
401; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
402; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
403; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
404; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000405; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
406; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000407; X32-NEXT: retl
408;
409; X64-LABEL: knownbits_mask_concat_uitofp:
410; X64: # BB#0:
411; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
412; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
413; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
414; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000415; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
416; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000417; X64-NEXT: retq
418 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
419 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
420 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
421 %4 = uitofp <8 x i32> %3 to <8 x float>
422 ret <8 x float> %4
423}
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000424
425define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
426; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
427; X32: # BB#0:
428; X32-NEXT: vpsrlq $1, %xmm0, %xmm0
429; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim54945a12016-12-10 17:00:00 +0000430; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000431; X32-NEXT: retl
432;
433; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
434; X64: # BB#0:
435; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
436; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim54945a12016-12-10 17:00:00 +0000437; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000438; X64-NEXT: retq
439 %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
440 %2 = bitcast <2 x i64> %1 to <4 x i32>
441 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
442 %4 = uitofp <4 x i32> %3 to <4 x float>
443 ret <4 x float> %4
444}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000445
446define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
447; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
448; X32: # BB#0:
449; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
450; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
451; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
452; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
453; X32-NEXT: vpsrld $16, %xmm0, %xmm0
454; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
455; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
456; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
460; X64: # BB#0:
461; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
462; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
463; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
464; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
465; X64-NEXT: vpsrld $16, %xmm0, %xmm0
466; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
467; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
468; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
469; X64-NEXT: retq
470 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
471 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
472 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
473 %4 = uitofp <4 x i32> %3 to <4 x float>
474 ret <4 x float> %4
475}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000476declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000477declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000478
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000479define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) {
480; X32-LABEL: knownbits_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000481; X32: # BB#0:
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000482; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000483; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
Simon Pilgrimfb32eea2017-01-19 22:41:22 +0000484; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000485; X32-NEXT: retl
486;
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000487; X64-LABEL: knownbits_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000488; X64: # BB#0:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000489; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000490; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
Simon Pilgrimfb32eea2017-01-19 22:41:22 +0000491; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000492; X64-NEXT: retq
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000493 %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
494 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
495 %3 = uitofp <4 x i32> %2 to <4 x float>
496 ret <4 x float> %3
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000497}
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000498declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000499declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimef76b832016-12-07 17:21:13 +0000500
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000501define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) {
502; X32-LABEL: knownbits_umax_shuffle_ashr:
503; X32: # BB#0:
504; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
505; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000506; X32-NEXT: retl
507;
508; X64-LABEL: knownbits_umax_shuffle_ashr:
509; X64: # BB#0:
510; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
511; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000512; X64-NEXT: retq
Simon Pilgrimd0ccf5e2017-01-18 11:20:31 +0000513 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
Simon Pilgrim421f2d92017-01-17 22:12:25 +0000514 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
515 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
516 ret <4 x i32> %3
517}
518
Simon Pilgrimef76b832016-12-07 17:21:13 +0000519define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
520; X32-LABEL: knownbits_mask_umax_shuffle_uitofp:
521; X32: # BB#0:
522; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
523; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
524; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
Simon Pilgrimba05d412016-12-07 17:54:00 +0000525; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimef76b832016-12-07 17:21:13 +0000526; X32-NEXT: retl
527;
528; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
529; X64: # BB#0:
530; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
531; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
532; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
Simon Pilgrimba05d412016-12-07 17:54:00 +0000533; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimef76b832016-12-07 17:21:13 +0000534; X64-NEXT: retq
535 %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
536 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
537 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
538 %4 = uitofp <4 x i32> %3 to <4 x float>
539 ret <4 x float> %4
540}
Simon Pilgrim355cd672017-01-16 13:59:42 +0000541
542define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) {
543; X32-LABEL: knownbits_mask_bitreverse_ashr:
544; X32: # BB#0:
Simon Pilgrim3e915192017-01-16 14:49:26 +0000545; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim355cd672017-01-16 13:59:42 +0000546; X32-NEXT: retl
547;
548; X64-LABEL: knownbits_mask_bitreverse_ashr:
549; X64: # BB#0:
Simon Pilgrim3e915192017-01-16 14:49:26 +0000550; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim355cd672017-01-16 13:59:42 +0000551; X64-NEXT: retq
552 %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2>
553 %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1)
554 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
555 ret <4 x i32> %3
556}
557declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone