blob: 516bb2cf342ef438b27b7d5f7b02f4b126e3d555 [file] [log] [blame]
Simon Pilgrim730f83a2016-10-15 19:29:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +00008; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
9; X32-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000010; X32-NEXT: retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64: # BB#0:
Simon Pilgrim75a697a2016-10-29 11:29:39 +000014; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
15; X64-NEXT: vpextrw $0, %xmm0, %eax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000016; X64-NEXT: retq
17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18 %2 = extractelement <8 x i16> %1, i32 0
19 %3 = sext i16 %2 to i32
20 ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32: # BB#0:
26; X32-NEXT: pushl %ebp
27; X32-NEXT: movl %esp, %ebp
28; X32-NEXT: andl $-8, %esp
29; X32-NEXT: subl $16, %esp
30; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
31; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000032; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000033; X32-NEXT: fildll {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000034; X32-NEXT: fstps {{[0-9]+}}(%esp)
Simon Pilgrim75a697a2016-10-29 11:29:39 +000035; X32-NEXT: flds {{[0-9]+}}(%esp)
Simon Pilgrim730f83a2016-10-15 19:29:26 +000036; X32-NEXT: movl %ebp, %esp
37; X32-NEXT: popl %ebp
38; X32-NEXT: retl
39;
40; X64-LABEL: knownbits_mask_extract_uitofp:
41; X64: # BB#0:
42; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
43; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
44; X64-NEXT: vmovq %xmm0, %rax
Simon Pilgrim730f83a2016-10-15 19:29:26 +000045; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
46; X64-NEXT: retq
Simon Pilgrim730f83a2016-10-15 19:29:26 +000047 %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
48 %2 = extractelement <2 x i64> %1, i32 0
49 %3 = uitofp i64 %2 to float
50 ret float %3
51}
52
Simon Pilgrimb421ef22016-12-07 15:27:18 +000053define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
54; X32-LABEL: knownbits_insert_uitofp:
55; X32: # BB#0:
56; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
57; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
58; X32-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
59; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
60; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
61; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
62; X32-NEXT: vpsrld $16, %xmm0, %xmm0
63; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
64; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
65; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
66; X32-NEXT: retl
67;
68; X64-LABEL: knownbits_insert_uitofp:
69; X64: # BB#0:
70; X64-NEXT: movzwl %di, %eax
71; X64-NEXT: movzwl %si, %ecx
72; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
73; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
74; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
75; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
76; X64-NEXT: vpsrld $16, %xmm0, %xmm0
77; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
78; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
79; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
80; X64-NEXT: retq
81 %1 = zext i16 %a1 to i32
82 %2 = zext i16 %a2 to i32
83 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
84 %4 = insertelement <4 x i32> %3, i32 %2, i32 2
85 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
86 %6 = uitofp <4 x i32> %5 to <4 x float>
87 ret <4 x float> %6
88}
89
Simon Pilgrim730f83a2016-10-15 19:29:26 +000090define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
91; X32-LABEL: knownbits_mask_shuffle_sext:
92; X32: # BB#0:
93; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +000094; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
95; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +000096; X32-NEXT: retl
97;
98; X64-LABEL: knownbits_mask_shuffle_sext:
99; X64: # BB#0:
100; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000101; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
102; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000103; X64-NEXT: retq
104 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
105 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
106 %3 = sext <4 x i16> %2 to <4 x i32>
107 ret <4 x i32> %3
108}
109
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000110define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
111; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
112; X32: # BB#0:
113; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim1577b392016-12-06 18:58:25 +0000114; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
115; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
116; X32-NEXT: retl
117;
118; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
119; X64: # BB#0:
120; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
121; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
122; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
123; X64-NEXT: retq
124 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
125 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
126 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
127 %4 = sext <4 x i16> %3 to <4 x i32>
128 ret <4 x i32> %4
129}
130
131define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
132; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
133; X32: # BB#0:
134; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000135; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
136; X32-NEXT: vpmovsxwd %xmm0, %xmm0
137; X32-NEXT: retl
138;
Simon Pilgrim1577b392016-12-06 18:58:25 +0000139; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000140; X64: # BB#0:
141; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
142; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
143; X64-NEXT: vpmovsxwd %xmm0, %xmm0
144; X64-NEXT: retq
145 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
Simon Pilgrim1577b392016-12-06 18:58:25 +0000146 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Simon Pilgrim4a2979c2016-12-06 17:00:47 +0000147 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
148 %4 = sext <4 x i16> %3 to <4 x i32>
149 ret <4 x i32> %4
150}
151
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000152define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
153; X32-LABEL: knownbits_mask_shuffle_uitofp:
154; X32: # BB#0:
155; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
156; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000157; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000158; X32-NEXT: retl
159;
160; X64-LABEL: knownbits_mask_shuffle_uitofp:
161; X64: # BB#0:
162; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
163; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim75a697a2016-10-29 11:29:39 +0000164; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim730f83a2016-10-15 19:29:26 +0000165; X64-NEXT: retq
166 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
167 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
168 %3 = uitofp <4 x i32> %2 to <4 x float>
169 ret <4 x float> %3
170}
Simon Pilgrimc1041852016-11-06 16:05:59 +0000171
172define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
173; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
174; X32: # BB#0:
175; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
176; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
177; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000178; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000179; X32-NEXT: retl
180;
181; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
182; X64: # BB#0:
183; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
184; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
185; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrimdd4809a2016-11-06 16:29:09 +0000186; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrimc1041852016-11-06 16:05:59 +0000187; X64-NEXT: retq
188 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
189 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
190 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
191 %4 = uitofp <4 x i32> %3 to <4 x float>
192 ret <4 x float> %4
193}
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000194
195define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
196; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
197; X32: # BB#0:
198; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
199; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0
200; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000201; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000202; X32-NEXT: retl
203;
204; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
205; X64: # BB#0:
206; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
207; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
208; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Simon Pilgrim39df78e2016-11-06 16:49:19 +0000209; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Simon Pilgrim3ac353c2016-11-06 16:36:29 +0000210; X64-NEXT: retq
211 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
212 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
213 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
214 %4 = uitofp <4 x i32> %3 to <4 x float>
215 ret <4 x float> %4
216}
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000217
218define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
219; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
220; X32: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000221; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000222; X32-NEXT: retl
223;
224; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
225; X64: # BB#0:
Simon Pilgrim3bf99c02016-11-10 13:52:42 +0000226; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimede8ad72016-11-10 13:34:17 +0000227; X64-NEXT: retq
228 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
229 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
230 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
231 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
232 ret <4 x i32> %4
233}
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000234
235define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
236; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
237; X32: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000238; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000239; X32-NEXT: retl
240;
241; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
242; X64: # BB#0:
Simon Pilgrimca57e532016-11-10 15:05:09 +0000243; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7be6d992016-11-10 14:46:24 +0000244; X64-NEXT: retq
245 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
246 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
247 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
248 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
249 ret <4 x i32> %4
250}
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000251
252define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
253; X32-LABEL: knownbits_mask_mul_shuffle_shl:
254; X32: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000255; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000256; X32-NEXT: retl
257;
258; X64-LABEL: knownbits_mask_mul_shuffle_shl:
259; X64: # BB#0:
Simon Pilgrimee187fd2016-11-10 16:27:42 +0000260; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim2cf393c2016-11-10 15:57:33 +0000261; X64-NEXT: retq
262 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
263 %2 = mul <4 x i32> %a1, %1
264 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
265 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
266 ret <4 x i32> %4
267}
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000268
269define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
270; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
271; X32: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000272; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000273; X32-NEXT: retl
274;
275; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
276; X64: # BB#0:
Simon Pilgrimd67af682016-11-10 17:43:52 +0000277; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrime517f0a2016-11-10 17:24:33 +0000278; X64-NEXT: retq
279 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
280 %2 = trunc <4 x i64> %1 to <4 x i32>
281 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
282 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
283 ret <4 x i32> %4
284}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000285
Simon Pilgrima0dee612016-11-10 22:34:12 +0000286define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
287; X32-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000288; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000289; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000290; X32-NEXT: retl
291;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000292; X64-LABEL: knownbits_mask_add_shuffle_lshr:
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000293; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000294; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000295; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000296 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
297 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000298 %3 = add <4 x i32> %1, %2
Simon Pilgrima0dee612016-11-10 22:34:12 +0000299 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
300 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
301 ret <4 x i32> %5
Simon Pilgrim8bbfaca2016-11-10 22:21:04 +0000302}
303
Simon Pilgrima0dee612016-11-10 22:34:12 +0000304define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
305; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000306; X32: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000307; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000308; X32-NEXT: retl
309;
Simon Pilgrima0dee612016-11-10 22:34:12 +0000310; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000311; X64: # BB#0:
Simon Pilgrim38f00452016-11-10 22:41:49 +0000312; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000313; X64-NEXT: retq
Simon Pilgrima0dee612016-11-10 22:34:12 +0000314 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000315 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
Simon Pilgrima0dee612016-11-10 22:34:12 +0000316 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
317 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
318 ret <4 x i32> %4
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000319}
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000320
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000321define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
322; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
323; X32: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000324; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000325; X32-NEXT: retl
326;
327; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
328; X64: # BB#0:
Simon Pilgrim06522272016-11-11 10:47:24 +0000329; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrimda1a43e2016-11-11 10:39:15 +0000330; X64-NEXT: retq
331 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
332 %2 = udiv <4 x i32> %1, %a1
333 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
334 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
335 ret <4 x i32> %4
Simon Pilgrim7e0a4b82016-11-10 21:50:23 +0000336}
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000337
338define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
339; X32-LABEL: knownbits_urem_lshr:
340; X32: # BB#0:
341; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
342; X32-NEXT: retl
343;
344; X64-LABEL: knownbits_urem_lshr:
345; X64: # BB#0:
346; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
347; X64-NEXT: retq
348 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
349 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
350 ret <4 x i32> %2
351}
352
353define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
354; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
355; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000356; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000357; X32-NEXT: retl
358;
359; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
360; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000361; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000362; X64-NEXT: retq
363 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
364 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
365 %3 = urem <4 x i32> %1, %2
366 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
367 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
368 ret <4 x i32> %5
369}
370
371define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
372; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
373; X32: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000374; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000375; X32-NEXT: retl
376;
377; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
378; X64: # BB#0:
Simon Pilgrim813721e2016-11-11 11:23:43 +0000379; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim8bc531d2016-11-11 11:11:40 +0000380; X64-NEXT: retq
381 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
382 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
383 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
384 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
385 ret <4 x i32> %4
386}
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000387
388define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
389; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
390; X32: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000391; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000392; X32-NEXT: retl
393;
394; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
395; X64: # BB#0:
Simon Pilgrim807f9cf2016-11-11 11:51:29 +0000396; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
Simon Pilgrim08dedfc2016-11-11 11:33:21 +0000397; X64-NEXT: retq
398 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
399 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
400 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
401 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
402 ret <4 x i32> %4
403}
404declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000405
406define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
407; X32-LABEL: knownbits_mask_concat_uitofp:
408; X32: # BB#0:
409; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
410; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
411; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
412; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000413; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
414; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000415; X32-NEXT: retl
416;
417; X64-LABEL: knownbits_mask_concat_uitofp:
418; X64: # BB#0:
419; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
420; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
421; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
422; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000423; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
424; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
Simon Pilgrim3a5328e2016-11-18 21:59:38 +0000425; X64-NEXT: retq
426 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
427 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
428 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
429 %4 = uitofp <8 x i32> %3 to <8 x float>
430 ret <8 x float> %4
431}
Simon Pilgrim84b6f262016-11-25 15:07:15 +0000432
433define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
434; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
435; X32: # BB#0:
436; X32-NEXT: vpsrlq $1, %xmm0, %xmm0
437; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
438; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
439; X32-NEXT: vpsrld $16, %xmm0, %xmm0
440; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
441; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
442; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
443; X32-NEXT: retl
444;
445; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
446; X64: # BB#0:
447; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
448; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
449; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
450; X64-NEXT: vpsrld $16, %xmm0, %xmm0
451; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
452; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
453; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
454; X64-NEXT: retq
455 %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
456 %2 = bitcast <2 x i64> %1 to <4 x i32>
457 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
458 %4 = uitofp <4 x i32> %3 to <4 x float>
459 ret <4 x float> %4
460}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000461
462define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
463; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
464; X32: # BB#0:
465; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
466; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
467; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
468; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
469; X32-NEXT: vpsrld $16, %xmm0, %xmm0
470; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
471; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
472; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
473; X32-NEXT: retl
474;
475; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
476; X64: # BB#0:
477; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
478; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
479; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
480; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
481; X64-NEXT: vpsrld $16, %xmm0, %xmm0
482; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
483; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
484; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
485; X64-NEXT: retq
486 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
487 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
488 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
489 %4 = uitofp <4 x i32> %3 to <4 x float>
490 ret <4 x float> %4
491}
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000492declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000493declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000494
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000495define <4 x float> @knownbits_umax_umin_shuffle_uitofp(<4 x i32> %a0) {
496; X32-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000497; X32: # BB#0:
498; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000499; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000500; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
501; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
502; X32-NEXT: vpsrld $16, %xmm0, %xmm0
503; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
504; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
505; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
506; X32-NEXT: retl
507;
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000508; X64-LABEL: knownbits_umax_umin_shuffle_uitofp:
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000509; X64: # BB#0:
510; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000511; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000512; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
513; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
514; X64-NEXT: vpsrld $16, %xmm0, %xmm0
515; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
516; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
517; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
518; X64-NEXT: retq
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000519 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
520 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000521 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
522 %4 = uitofp <4 x i32> %3 to <4 x float>
523 ret <4 x float> %4
524}
Simon Pilgrim7c7b6492016-12-06 15:17:50 +0000525declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
Simon Pilgrimae63dd12016-12-06 12:12:20 +0000526declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone