blob: 2a54580c9ad6f1e196cb3e79fbaef289bc775219 [file] [log] [blame]
Simon Pilgrim730f83a2016-10-15 19:29:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +00008; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
9; X32-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000010; X32-NEXT: retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +000014; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
15; X64-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000016; X64-NEXT: retq
17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18 %2 = extractelement <8 x i16> %1, i32 0
19 %3 = sext i16 %2 to i32
20 ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32: # BB#0:
26; X32-NEXT: pushl %ebp
27; X32-NEXT: movl %esp, %ebp
28; X32-NEXT: andl $-8, %esp
29; X32-NEXT: subl $16, %esp
30; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
31; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000032; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000033; X32-NEXT: fildll {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000034; X32-NEXT: fstps {{[0-9]+}}(%esp)
Simon Pilgrim75a697a2016-10-29 11:29:39 +000035; X32-NEXT: flds {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000036; X32-NEXT: movl %ebp, %esp
37; X32-NEXT: popl %ebp
38; X32-NEXT: retl
39;
40; X64-LABEL: knownbits_mask_extract_uitofp:
41; X64: # BB#0:
42; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
43; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
44; X64-NEXT: vmovq %xmm0, %rax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000045; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
46; X64-NEXT: retq
Simon Pilgrim730f83a2016-10-15 19:29:26 +000047 %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
48 %2 = extractelement <2 x i64> %1, i32 0
49 %3 = uitofp i64 %2 to float
50 ret float %3
51}
52
53define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
54; X32-LABEL: knownbits_mask_shuffle_sext:
55; X32: # BB#0:
56; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000057; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
58; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000059; X32-NEXT: retl
60;
61; X64-LABEL: knownbits_mask_shuffle_sext:
62; X64: # BB#0:
63; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000064; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
65; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000066; X64-NEXT: retq
67 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
68 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
69 %3 = sext <4 x i16> %2 to <4 x i32>
70 ret <4 x i32> %3
71}
72
Simon Pilgrim4a2979c2016-12-06 17:00:47 +000073define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
74; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
75; X32: # BB#0:
76; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim1577b392016-12-06 18:58:25 +000077; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
78; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
79; X32-NEXT: retl
80;
81; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
82; X64: # BB#0:
83; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
84; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
85; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
86; X64-NEXT: retq
87 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
88 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
89 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
90 %4 = sext <4 x i16> %3 to <4 x i32>
91 ret <4 x i32> %4
92}
93
94define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
95; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
96; X32: # BB#0:
97; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim4a2979c2016-12-06 17:00:47 +000098; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
99; X32-NEXT: vpmovsxwd %xmm0, %xmm0
100; X32-NEXT: retl
101;
Simon Pilgrim1577b392016-12-06 18:58:25 +0000102; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000103; X64: # BB#0:
104; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
105; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
106; X64-NEXT: vpmovsxwd %xmm0, %xmm0
107; X64-NEXT: retq
108 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
Simon Pilgrim1577b392016-12-06 18:58:25 +0000109 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000110 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111 %4 = sext <4 x i16> %3 to <4 x i32>
112 ret <4 x i32> %4
113}
114
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000115define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
116; X32-LABEL: knownbits_mask_shuffle_uitofp:
117; X32: # BB#0:
118; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
119; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000120; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000121; X32-NEXT: retl
122;
123; X64-LABEL: knownbits_mask_shuffle_uitofp:
124; X64: # BB#0:
125; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
126; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000127; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000128; X64-NEXT: retq
129 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
130 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
131 %3 = uitofp <4 x i32> %2 to <4 x float>
132 ret <4 x float> %3
133}
Simon Pilgrimc1041852016-11-06 16:05:59 +0000134
135define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
136; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
137; X32: # BB#0:
138; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
139; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
140; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000141; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000142; X32-NEXT: retl
143;
144; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
145; X64: # BB#0:
146; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
147; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
148; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000149; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000150; X64-NEXT: retq
151 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
152 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
153 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
154 %4 = uitofp <4 x i32> %3 to <4 x float>
155 ret <4 x float> %4
156}
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000157
158define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
159; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
160; X32: # BB#0:
161; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
162; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0
163; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000164; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000165; X32-NEXT: retl
166;
167; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
168; X64: # BB#0:
169; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
170; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
171; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000172; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000173; X64-NEXT: retq
174 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
175 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
176 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
177 %4 = uitofp <4 x i32> %3 to <4 x float>
178 ret <4 x float> %4
179}
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000180
181define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
182; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
183; X32: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000184; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000185; X32-NEXT: retl
186;
187; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
188; X64: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000189; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000190; X64-NEXT: retq
191 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
192 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
193 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
194 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
195 ret <4 x i32> %4
196}
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000197
198define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
199; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
200; X32: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000201; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000202; X32-NEXT: retl
203;
204; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
205; X64: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000206; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000207; X64-NEXT: retq
208 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
209 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
210 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
211 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
212 ret <4 x i32> %4
213}
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000214
215define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
216; X32-LABEL: knownbits_mask_mul_shuffle_shl:
217; X32: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000218; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000219; X32-NEXT: retl
220;
221; X64-LABEL: knownbits_mask_mul_shuffle_shl:
222; X64: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000223; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000224; X64-NEXT: retq
225 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
226 %2 = mul <4 x i32> %a1, %1
227 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
228 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
229 ret <4 x i32> %4
230}
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000231
232define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
233; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
234; X32: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000235; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000236; X32-NEXT: retl
237;
238; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
239; X64: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000240; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000241; X64-NEXT: retq
242 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
243 %2 = trunc <4 x i64> %1 to <4 x i32>
244 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
245 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
246 ret <4 x i32> %4
247}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000248
Simon Pilgrima0dee612016-11-10 22:34:12 +0000249define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
250; X32-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000251; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000252; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000253; X32-NEXT: retl
254;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000255; X64-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000256; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000257; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000258; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000259 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
260 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000261 %3 = add <4 x i32> %1, %2
Simon Pilgrima0dee612016-11-10 22:34:12 +0000262 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
263 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
264 ret <4 x i32> %5
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000265}
266
Simon Pilgrima0dee612016-11-10 22:34:12 +0000267define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
268; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000269; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000270; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000271; X32-NEXT: retl
272;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000273; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000274; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000275; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000276; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000277 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000278 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
Simon Pilgrima0dee612016-11-10 22:34:12 +0000279 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
280 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
281 ret <4 x i32> %4
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000282}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000283
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000284define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
285; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
286; X32: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000287; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000288; X32-NEXT: retl
289;
290; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
291; X64: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000292; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000293; X64-NEXT: retq
294 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
295 %2 = udiv <4 x i32> %1, %a1
296 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
297 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
298 ret <4 x i32> %4
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000299}
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000300
301define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
302; X32-LABEL: knownbits_urem_lshr:
303; X32: # BB#0:
304; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
305; X32-NEXT: retl
306;
307; X64-LABEL: knownbits_urem_lshr:
308; X64: # BB#0:
309; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
310; X64-NEXT: retq
311 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
312 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
313 ret <4 x i32> %2
314}
315
316define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
317; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
318; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000319; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000320; X32-NEXT: retl
321;
322; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
323; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000324; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000325; X64-NEXT: retq
326 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
327 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
328 %3 = urem <4 x i32> %1, %2
329 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
330 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
331 ret <4 x i32> %5
332}
333
334define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
335; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
336; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000337; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000338; X32-NEXT: retl
339;
340; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
341; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000342; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000343; X64-NEXT: retq
344 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
345 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
346 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
347 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
348 ret <4 x i32> %4
349}
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000350
351define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
352; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
353; X32: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000354; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000355; X32-NEXT: retl
356;
357; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
358; X64: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000359; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000360; X64-NEXT: retq
361 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
362 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
363 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
364 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
365 ret <4 x i32> %4
366}
367declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000368
369define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
370; X32-LABEL: knownbits_mask_concat_uitofp:
371; X32: # BB#0:
372; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
373; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
374; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
375; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000376; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
377; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000378; X32-NEXT: retl
379;
380; X64-LABEL: knownbits_mask_concat_uitofp:
381; X64: # BB#0:
382; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
383; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
384; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
385; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000386; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
387; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000388; X64-NEXT: retq
389 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
390 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
391 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
392 %4 = uitofp <8 x i32> %3 to <8 x float>
393 ret <8 x float> %4
394}
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000395
396define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
397; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
398; X32: # BB#0:
399; X32-NEXT: vpsrlq $1, %xmm0, %xmm0
400; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
401; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
402; X32-NEXT: vpsrld $16, %xmm0, %xmm0
403; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
404; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
405; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
406; X32-NEXT: retl
407;
408; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
409; X64: # BB#0:
410; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
411; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
412; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
413; X64-NEXT: vpsrld $16, %xmm0, %xmm0
414; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
415; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
416; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
417; X64-NEXT: retq
418 %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
419 %2 = bitcast <2 x i64> %1 to <4 x i32>
420 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
421 %4 = uitofp <4 x i32> %3 to <4 x float>
422 ret <4 x float> %4
423}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000424
425define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
426; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
427; X32: # BB#0:
428; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
429; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
430; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
431; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
432; X32-NEXT: vpsrld $16, %xmm0, %xmm0
433; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
434; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
435; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
436; X32-NEXT: retl
437;
438; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
439; X64: # BB#0:
440; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
441; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
442; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
443; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
444; X64-NEXT: vpsrld $16, %xmm0, %xmm0
445; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
446; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
447; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
448; X64-NEXT: retq
449 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
450 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
451 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
452 %4 = uitofp <4 x i32> %3 to <4 x float>
453 ret <4 x float> %4
454}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000455declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000456declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000457
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000458define <4 x float> @knownbits_umax_umin_shuffle_uitofp(<4 x i32> %a0) {
459; X32-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000460; X32: # BB#0:
461; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000462; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000463; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
464; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
465; X32-NEXT: vpsrld $16, %xmm0, %xmm0
466; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
467; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
468; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
469; X32-NEXT: retl
470;
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000471; X64-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000472; X64: # BB#0:
473; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000474; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000475; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
476; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
477; X64-NEXT: vpsrld $16, %xmm0, %xmm0
478; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
479; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
480; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
481; X64-NEXT: retq
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000482 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
483 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000484 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
485 %4 = uitofp <4 x i32> %3 to <4 x float>
486 ret <4 x float> %4
487}
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000488declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000489declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone