blob: e5e4fcf833875f6cb6375e6b35f61fdb2d3885d7 [file] [log] [blame]
Simon Pilgrim730f83a2016-10-15 19:29:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +00008; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
9; X32-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000010; X32-NEXT: retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +000014; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
15; X64-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000016; X64-NEXT: retq
17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18 %2 = extractelement <8 x i16> %1, i32 0
19 %3 = sext i16 %2 to i32
20 ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32: # BB#0:
26; X32-NEXT: pushl %ebp
27; X32-NEXT: movl %esp, %ebp
28; X32-NEXT: andl $-8, %esp
29; X32-NEXT: subl $16, %esp
30; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
31; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000032; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000033; X32-NEXT: fildll {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000034; X32-NEXT: fstps {{[0-9]+}}(%esp)
Simon Pilgrim75a697a2016-10-29 11:29:39 +000035; X32-NEXT: flds {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000036; X32-NEXT: movl %ebp, %esp
37; X32-NEXT: popl %ebp
38; X32-NEXT: retl
39;
40; X64-LABEL: knownbits_mask_extract_uitofp:
41; X64: # BB#0:
42; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
43; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
44; X64-NEXT: vmovq %xmm0, %rax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000045; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
46; X64-NEXT: retq
Simon Pilgrim730f83a2016-10-15 19:29:26 +000047 %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
48 %2 = extractelement <2 x i64> %1, i32 0
49 %3 = uitofp i64 %2 to float
50 ret float %3
51}
52
53define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
54; X32-LABEL: knownbits_mask_shuffle_sext:
55; X32: # BB#0:
56; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000057; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
58; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000059; X32-NEXT: retl
60;
61; X64-LABEL: knownbits_mask_shuffle_sext:
62; X64: # BB#0:
63; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000064; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
65; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000066; X64-NEXT: retq
67 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
68 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
69 %3 = sext <4 x i16> %2 to <4 x i32>
70 ret <4 x i32> %3
71}
72
Simon Pilgrim4a2979c2016-12-06 17:00:47 +000073define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
74; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
75; X32: # BB#0:
76; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
77; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
78; X32-NEXT: vpmovsxwd %xmm0, %xmm0
79; X32-NEXT: retl
80;
81; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
82; X64: # BB#0:
83; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
84; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
85; X64-NEXT: vpmovsxwd %xmm0, %xmm0
86; X64-NEXT: retq
87 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
88 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
89 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
90 %4 = sext <4 x i16> %3 to <4 x i32>
91 ret <4 x i32> %4
92}
93
Simon Pilgrim730f83a2016-10-15 19:29:26 +000094define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
95; X32-LABEL: knownbits_mask_shuffle_uitofp:
96; X32: # BB#0:
97; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
98; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +000099; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000100; X32-NEXT: retl
101;
102; X64-LABEL: knownbits_mask_shuffle_uitofp:
103; X64: # BB#0:
104; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
105; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000106; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000107; X64-NEXT: retq
108 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
109 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
110 %3 = uitofp <4 x i32> %2 to <4 x float>
111 ret <4 x float> %3
112}
Simon Pilgrimc1041852016-11-06 16:05:59 +0000113
114define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
115; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
116; X32: # BB#0:
117; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
118; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
119; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000120; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000121; X32-NEXT: retl
122;
123; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
124; X64: # BB#0:
125; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
126; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
127; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000128; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000129; X64-NEXT: retq
130 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
131 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
132 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
133 %4 = uitofp <4 x i32> %3 to <4 x float>
134 ret <4 x float> %4
135}
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000136
137define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
138; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
139; X32: # BB#0:
140; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
141; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0
142; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000143; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000144; X32-NEXT: retl
145;
146; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
147; X64: # BB#0:
148; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
149; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
150; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000151; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000152; X64-NEXT: retq
153 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
154 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
155 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
156 %4 = uitofp <4 x i32> %3 to <4 x float>
157 ret <4 x float> %4
158}
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000159
160define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
161; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
162; X32: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000163; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000164; X32-NEXT: retl
165;
166; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
167; X64: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000168; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000169; X64-NEXT: retq
170 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
171 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
172 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
173 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
174 ret <4 x i32> %4
175}
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000176
177define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
178; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
179; X32: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000180; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000181; X32-NEXT: retl
182;
183; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
184; X64: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000185; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000186; X64-NEXT: retq
187 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
188 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
189 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
190 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
191 ret <4 x i32> %4
192}
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000193
194define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
195; X32-LABEL: knownbits_mask_mul_shuffle_shl:
196; X32: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000197; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000198; X32-NEXT: retl
199;
200; X64-LABEL: knownbits_mask_mul_shuffle_shl:
201; X64: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000202; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000203; X64-NEXT: retq
204 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
205 %2 = mul <4 x i32> %a1, %1
206 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
207 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
208 ret <4 x i32> %4
209}
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000210
211define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
212; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
213; X32: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000214; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000215; X32-NEXT: retl
216;
217; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
218; X64: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000219; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000220; X64-NEXT: retq
221 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
222 %2 = trunc <4 x i64> %1 to <4 x i32>
223 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
224 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
225 ret <4 x i32> %4
226}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000227
Simon Pilgrima0dee612016-11-10 22:34:12 +0000228define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
229; X32-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000230; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000231; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000232; X32-NEXT: retl
233;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000234; X64-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000235; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000236; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000237; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000238 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
239 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000240 %3 = add <4 x i32> %1, %2
Simon Pilgrima0dee612016-11-10 22:34:12 +0000241 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
242 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
243 ret <4 x i32> %5
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000244}
245
Simon Pilgrima0dee612016-11-10 22:34:12 +0000246define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
247; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000248; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000249; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000250; X32-NEXT: retl
251;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000252; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000253; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000254; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000255; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000256 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000257 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
Simon Pilgrima0dee612016-11-10 22:34:12 +0000258 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
259 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
260 ret <4 x i32> %4
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000261}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000262
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000263define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
264; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
265; X32: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000266; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000267; X32-NEXT: retl
268;
269; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
270; X64: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000271; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000272; X64-NEXT: retq
273 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
274 %2 = udiv <4 x i32> %1, %a1
275 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
276 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
277 ret <4 x i32> %4
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000278}
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000279
280define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
281; X32-LABEL: knownbits_urem_lshr:
282; X32: # BB#0:
283; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
284; X32-NEXT: retl
285;
286; X64-LABEL: knownbits_urem_lshr:
287; X64: # BB#0:
288; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
289; X64-NEXT: retq
290 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
291 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
292 ret <4 x i32> %2
293}
294
295define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
296; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
297; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000298; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000299; X32-NEXT: retl
300;
301; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
302; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000303; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000304; X64-NEXT: retq
305 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
306 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
307 %3 = urem <4 x i32> %1, %2
308 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
309 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
310 ret <4 x i32> %5
311}
312
313define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
314; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
315; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000316; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000317; X32-NEXT: retl
318;
319; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
320; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000321; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000322; X64-NEXT: retq
323 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
324 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
325 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
326 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
327 ret <4 x i32> %4
328}
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000329
330define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
331; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
332; X32: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000333; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000334; X32-NEXT: retl
335;
336; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
337; X64: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000338; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000339; X64-NEXT: retq
340 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
341 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
342 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
343 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
344 ret <4 x i32> %4
345}
346declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000347
348define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
349; X32-LABEL: knownbits_mask_concat_uitofp:
350; X32: # BB#0:
351; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
352; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
353; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
354; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000355; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
356; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000357; X32-NEXT: retl
358;
359; X64-LABEL: knownbits_mask_concat_uitofp:
360; X64: # BB#0:
361; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
362; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
363; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
364; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000365; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
366; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000367; X64-NEXT: retq
368 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
369 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
370 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
371 %4 = uitofp <8 x i32> %3 to <8 x float>
372 ret <8 x float> %4
373}
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000374
375define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
376; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
377; X32: # BB#0:
378; X32-NEXT: vpsrlq $1, %xmm0, %xmm0
379; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
380; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
381; X32-NEXT: vpsrld $16, %xmm0, %xmm0
382; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
383; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
384; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
385; X32-NEXT: retl
386;
387; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
388; X64: # BB#0:
389; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
390; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
391; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
392; X64-NEXT: vpsrld $16, %xmm0, %xmm0
393; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
394; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
395; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
396; X64-NEXT: retq
397 %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
398 %2 = bitcast <2 x i64> %1 to <4 x i32>
399 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
400 %4 = uitofp <4 x i32> %3 to <4 x float>
401 ret <4 x float> %4
402}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000403
404define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
405; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
406; X32: # BB#0:
407; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
408; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
409; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
410; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
411; X32-NEXT: vpsrld $16, %xmm0, %xmm0
412; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
413; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
414; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
415; X32-NEXT: retl
416;
417; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
418; X64: # BB#0:
419; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
420; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
421; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
422; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
423; X64-NEXT: vpsrld $16, %xmm0, %xmm0
424; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
425; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
426; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
427; X64-NEXT: retq
428 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
429 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
430 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
431 %4 = uitofp <4 x i32> %3 to <4 x float>
432 ret <4 x float> %4
433}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000434declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000435declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000436
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000437define <4 x float> @knownbits_umax_umin_shuffle_uitofp(<4 x i32> %a0) {
438; X32-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000439; X32: # BB#0:
440; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000441; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000442; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
443; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
444; X32-NEXT: vpsrld $16, %xmm0, %xmm0
445; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
446; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
447; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
448; X32-NEXT: retl
449;
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000450; X64-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000451; X64: # BB#0:
452; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000453; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000454; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
455; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
456; X64-NEXT: vpsrld $16, %xmm0, %xmm0
457; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
458; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
459; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
460; X64-NEXT: retq
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000461 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
462 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000463 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
464 %4 = uitofp <4 x i32> %3 to <4 x float>
465 ret <4 x float> %4
466}
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000467declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000468declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone