blob: 3088e5eb9f0b8ff037f815be80bdc80a4b1d95bd [file] [log] [blame]
Roman Lebedevfd79bc32018-05-21 21:40:51 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
5
6; ============================================================================ ;
7; Various cases with %x and/or %y being a constant
8; ============================================================================ ;
9
10define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
11; CHECK-SSE1-LABEL: out_constant_varx_mone:
12; CHECK-SSE1: # %bb.0:
13; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
14; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
15; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
16; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
17; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
18; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
19; CHECK-SSE1-NEXT: movq %rdi, %rax
20; CHECK-SSE1-NEXT: retq
21;
22; CHECK-SSE2-LABEL: out_constant_varx_mone:
23; CHECK-SSE2: # %bb.0:
24; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29; CHECK-SSE2-NEXT: retq
30;
31; CHECK-XOP-LABEL: out_constant_varx_mone:
32; CHECK-XOP: # %bb.0:
33; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, <4 x i32> *%px, align 16
40 %y = load <4 x i32>, <4 x i32> *%py, align 16
41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
46 ret <4 x i32> %r
47}
48
49define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
50; CHECK-SSE1-LABEL: in_constant_varx_mone:
51; CHECK-SSE1: # %bb.0:
52; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
53; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
54; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0
55; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
56; CHECK-SSE1-NEXT: movq %rdi, %rax
57; CHECK-SSE1-NEXT: retq
58;
59; CHECK-SSE2-LABEL: in_constant_varx_mone:
60; CHECK-SSE2: # %bb.0:
61; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
62; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
63; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0
64; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
65; CHECK-SSE2-NEXT: retq
66;
67; CHECK-XOP-LABEL: in_constant_varx_mone:
68; CHECK-XOP: # %bb.0:
69; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
70; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
71; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0
72; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
73; CHECK-XOP-NEXT: retq
74 %x = load <4 x i32>, <4 x i32> *%px, align 16
75 %y = load <4 x i32>, <4 x i32> *%py, align 16
76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
78 %n1 = and <4 x i32> %n0, %mask
79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
80 ret <4 x i32> %r
81}
82
83; This is not a canonical form. Testing for completeness only.
84define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
85; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
86; CHECK-SSE1: # %bb.0:
87; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
88; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
89; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
90; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
91; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
92; CHECK-SSE1-NEXT: movq %rdi, %rax
93; CHECK-SSE1-NEXT: retq
94;
95; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
96; CHECK-SSE2: # %bb.0:
97; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
98; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
99; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
100; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
101; CHECK-SSE2-NEXT: retq
102;
103; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
104; CHECK-XOP: # %bb.0:
105; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
106; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
107; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
108; CHECK-XOP-NEXT: retq
109 %x = load <4 x i32>, <4 x i32> *%px, align 16
110 %y = load <4 x i32>, <4 x i32> *%py, align 16
111 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
112 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
113 %mx = and <4 x i32> %notmask, %x
114 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
115 %r = or <4 x i32> %mx, %my
116 ret <4 x i32> %r
117}
118
119; This is not a canonical form. Testing for completeness only.
120define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
121; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
122; CHECK-SSE1: # %bb.0:
123; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
124; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
125; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
126; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
127; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
128; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
129; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
130; CHECK-SSE1-NEXT: movq %rdi, %rax
131; CHECK-SSE1-NEXT: retq
132;
133; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
134; CHECK-SSE2: # %bb.0:
135; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
136; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
137; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
138; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
139; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
140; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
141; CHECK-SSE2-NEXT: retq
142;
143; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
144; CHECK-XOP: # %bb.0:
145; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
146; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
147; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
148; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
149; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
150; CHECK-XOP-NEXT: retq
151 %x = load <4 x i32>, <4 x i32> *%px, align 16
152 %y = load <4 x i32>, <4 x i32> *%py, align 16
153 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
154 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
155 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
156 %n1 = and <4 x i32> %n0, %notmask
157 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
158 ret <4 x i32> %r
159}
160
161define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
162; CHECK-SSE1-LABEL: out_constant_varx_42:
163; CHECK-SSE1: # %bb.0:
164; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
165; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
166; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
167; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
168; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
169; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
170; CHECK-SSE1-NEXT: movq %rdi, %rax
171; CHECK-SSE1-NEXT: retq
172;
173; CHECK-SSE2-LABEL: out_constant_varx_42:
174; CHECK-SSE2: # %bb.0:
175; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
176; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
177; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
178; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
179; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
180; CHECK-SSE2-NEXT: retq
181;
182; CHECK-XOP-LABEL: out_constant_varx_42:
183; CHECK-XOP: # %bb.0:
184; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
185; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
186; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
187; CHECK-XOP-NEXT: retq
188 %x = load <4 x i32>, <4 x i32> *%px, align 16
189 %y = load <4 x i32>, <4 x i32> *%py, align 16
190 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
191 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
192 %mx = and <4 x i32> %mask, %x
193 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
194 %r = or <4 x i32> %mx, %my
195 ret <4 x i32> %r
196}
197
198define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
199; CHECK-SSE1-LABEL: in_constant_varx_42:
200; CHECK-SSE1: # %bb.0:
201; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm0 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
202; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
203; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
204; CHECK-SSE1-NEXT: andps (%rcx), %xmm1
205; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
206; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
207; CHECK-SSE1-NEXT: movq %rdi, %rax
208; CHECK-SSE1-NEXT: retq
209;
210; CHECK-SSE2-LABEL: in_constant_varx_42:
211; CHECK-SSE2: # %bb.0:
212; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
213; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
214; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
215; CHECK-SSE2-NEXT: andps (%rdx), %xmm0
216; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
217; CHECK-SSE2-NEXT: retq
218;
219; CHECK-XOP-LABEL: in_constant_varx_42:
220; CHECK-XOP: # %bb.0:
221; CHECK-XOP-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42]
222; CHECK-XOP-NEXT: vxorps (%rdi), %xmm0, %xmm1
223; CHECK-XOP-NEXT: vandps (%rdx), %xmm1, %xmm1
224; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0
225; CHECK-XOP-NEXT: retq
226 %x = load <4 x i32>, <4 x i32> *%px, align 16
227 %y = load <4 x i32>, <4 x i32> *%py, align 16
228 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
229 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
230 %n1 = and <4 x i32> %n0, %mask
231 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
232 ret <4 x i32> %r
233}
234
235; This is not a canonical form. Testing for completeness only.
236define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
237; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
238; CHECK-SSE1: # %bb.0:
239; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
240; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
241; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
242; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
243; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
244; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
245; CHECK-SSE1-NEXT: movq %rdi, %rax
246; CHECK-SSE1-NEXT: retq
247;
248; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
249; CHECK-SSE2: # %bb.0:
250; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
251; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
252; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
253; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
254; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
255; CHECK-SSE2-NEXT: retq
256;
257; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
258; CHECK-XOP: # %bb.0:
259; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
260; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
261; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
262; CHECK-XOP-NEXT: retq
263 %x = load <4 x i32>, <4 x i32> *%px, align 16
264 %y = load <4 x i32>, <4 x i32> *%py, align 16
265 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
266 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
267 %mx = and <4 x i32> %notmask, %x
268 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
269 %r = or <4 x i32> %mx, %my
270 ret <4 x i32> %r
271}
272
273; This is not a canonical form. Testing for completeness only.
274define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
275; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
276; CHECK-SSE1: # %bb.0:
277; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
278; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
279; CHECK-SSE1-NEXT: movaps (%rsi), %xmm2
280; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
281; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
282; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
283; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
284; CHECK-SSE1-NEXT: movq %rdi, %rax
285; CHECK-SSE1-NEXT: retq
286;
287; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
288; CHECK-SSE2: # %bb.0:
289; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
290; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
291; CHECK-SSE2-NEXT: movaps (%rdi), %xmm2
292; CHECK-SSE2-NEXT: xorps %xmm1, %xmm2
293; CHECK-SSE2-NEXT: andnps %xmm2, %xmm0
294; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
295; CHECK-SSE2-NEXT: retq
296;
297; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
298; CHECK-XOP: # %bb.0:
299; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
300; CHECK-XOP-NEXT: vmovaps {{.*#+}} xmm1 = [42,42,42,42]
301; CHECK-XOP-NEXT: vxorps (%rdi), %xmm1, %xmm2
302; CHECK-XOP-NEXT: vandnps %xmm2, %xmm0, %xmm0
303; CHECK-XOP-NEXT: vxorps %xmm1, %xmm0, %xmm0
304; CHECK-XOP-NEXT: retq
305 %x = load <4 x i32>, <4 x i32> *%px, align 16
306 %y = load <4 x i32>, <4 x i32> *%py, align 16
307 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
308 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
309 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
310 %n1 = and <4 x i32> %n0, %notmask
311 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
312 ret <4 x i32> %r
313}
314
315define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
316; CHECK-SSE1-LABEL: out_constant_mone_vary:
317; CHECK-SSE1: # %bb.0:
318; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
319; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
320; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
321; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
322; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
323; CHECK-SSE1-NEXT: movq %rdi, %rax
324; CHECK-SSE1-NEXT: retq
325;
326; CHECK-SSE2-LABEL: out_constant_mone_vary:
327; CHECK-SSE2: # %bb.0:
328; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
329; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
330; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
331; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
332; CHECK-SSE2-NEXT: retq
333;
334; CHECK-XOP-LABEL: out_constant_mone_vary:
335; CHECK-XOP: # %bb.0:
336; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
337; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
338; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
339; CHECK-XOP-NEXT: retq
340 %x = load <4 x i32>, <4 x i32> *%px, align 16
341 %y = load <4 x i32>, <4 x i32> *%py, align 16
342 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
343 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
344 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
345 %my = and <4 x i32> %notmask, %y
346 %r = or <4 x i32> %mx, %my
347 ret <4 x i32> %r
348}
349
350define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
351; CHECK-SSE1-LABEL: in_constant_mone_vary:
352; CHECK-SSE1: # %bb.0:
353; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
354; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
355; CHECK-SSE1-NEXT: andnps (%rcx), %xmm1
356; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
357; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
358; CHECK-SSE1-NEXT: movq %rdi, %rax
359; CHECK-SSE1-NEXT: retq
360;
361; CHECK-SSE2-LABEL: in_constant_mone_vary:
362; CHECK-SSE2: # %bb.0:
363; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
364; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
365; CHECK-SSE2-NEXT: andnps (%rdx), %xmm0
366; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
367; CHECK-SSE2-NEXT: retq
368;
369; CHECK-XOP-LABEL: in_constant_mone_vary:
370; CHECK-XOP: # %bb.0:
371; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
372; CHECK-XOP-NEXT: vandnps (%rdx), %xmm0, %xmm1
373; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0
374; CHECK-XOP-NEXT: retq
375 %x = load <4 x i32>, <4 x i32> *%px, align 16
376 %y = load <4 x i32>, <4 x i32> *%py, align 16
377 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
378 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
379 %n1 = and <4 x i32> %n0, %mask
380 %r = xor <4 x i32> %n1, %y
381 ret <4 x i32> %r
382}
383
384; This is not a canonical form. Testing for completeness only.
385define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
386; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
387; CHECK-SSE1: # %bb.0:
388; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
389; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
390; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
391; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
392; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
393; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
394; CHECK-SSE1-NEXT: movq %rdi, %rax
395; CHECK-SSE1-NEXT: retq
396;
397; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
398; CHECK-SSE2: # %bb.0:
399; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
400; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
401; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
402; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
403; CHECK-SSE2-NEXT: por %xmm1, %xmm0
404; CHECK-SSE2-NEXT: retq
405;
406; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
407; CHECK-XOP: # %bb.0:
408; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
409; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
410; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
411; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
412; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
413; CHECK-XOP-NEXT: retq
414 %x = load <4 x i32>, <4 x i32> *%px, align 16
415 %y = load <4 x i32>, <4 x i32> *%py, align 16
416 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
417 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
418 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
419 %my = and <4 x i32> %mask, %y
420 %r = or <4 x i32> %mx, %my
421 ret <4 x i32> %r
422}
423
424; This is not a canonical form. Testing for completeness only.
425define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
426; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
427; CHECK-SSE1: # %bb.0:
428; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
429; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1
430; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm1
431; CHECK-SSE1-NEXT: movaps %xmm0, %xmm2
432; CHECK-SSE1-NEXT: andnps %xmm1, %xmm2
433; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2
434; CHECK-SSE1-NEXT: movaps %xmm2, (%rdi)
435; CHECK-SSE1-NEXT: movq %rdi, %rax
436; CHECK-SSE1-NEXT: retq
437;
438; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
439; CHECK-SSE2: # %bb.0:
440; CHECK-SSE2-NEXT: movdqa (%rsi), %xmm1
441; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2
442; CHECK-SSE2-NEXT: pxor (%rdx), %xmm2
443; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
444; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
445; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
446; CHECK-SSE2-NEXT: retq
447;
448; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
449; CHECK-XOP: # %bb.0:
450; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
451; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
452; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm1
453; CHECK-XOP-NEXT: vpandn %xmm1, %xmm0, %xmm1
454; CHECK-XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
455; CHECK-XOP-NEXT: retq
456 %x = load <4 x i32>, <4 x i32> *%px, align 16
457 %y = load <4 x i32>, <4 x i32> *%py, align 16
458 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
459 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
460 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
461 %n1 = and <4 x i32> %n0, %notmask
462 %r = xor <4 x i32> %n1, %y
463 ret <4 x i32> %r
464}
465
466define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
467; CHECK-SSE1-LABEL: out_constant_42_vary:
468; CHECK-SSE1: # %bb.0:
469; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
470; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
471; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
472; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
473; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
474; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
475; CHECK-SSE1-NEXT: movq %rdi, %rax
476; CHECK-SSE1-NEXT: retq
477;
478; CHECK-SSE2-LABEL: out_constant_42_vary:
479; CHECK-SSE2: # %bb.0:
480; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
481; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
482; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
483; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
484; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
485; CHECK-SSE2-NEXT: retq
486;
487; CHECK-XOP-LABEL: out_constant_42_vary:
488; CHECK-XOP: # %bb.0:
489; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
490; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
491; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
492; CHECK-XOP-NEXT: retq
493 %x = load <4 x i32>, <4 x i32> *%px, align 16
494 %y = load <4 x i32>, <4 x i32> *%py, align 16
495 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
496 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
497 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
498 %my = and <4 x i32> %notmask, %y
499 %r = or <4 x i32> %mx, %my
500 ret <4 x i32> %r
501}
502
503define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
504; CHECK-SSE1-LABEL: in_constant_42_vary:
505; CHECK-SSE1: # %bb.0:
506; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
507; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
508; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
509; CHECK-SSE1-NEXT: andps (%rcx), %xmm1
510; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
511; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
512; CHECK-SSE1-NEXT: movq %rdi, %rax
513; CHECK-SSE1-NEXT: retq
514;
515; CHECK-SSE2-LABEL: in_constant_42_vary:
516; CHECK-SSE2: # %bb.0:
517; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
518; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42]
519; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
520; CHECK-SSE2-NEXT: andps (%rdx), %xmm0
521; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
522; CHECK-SSE2-NEXT: retq
523;
524; CHECK-XOP-LABEL: in_constant_42_vary:
525; CHECK-XOP: # %bb.0:
526; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
527; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm1
528; CHECK-XOP-NEXT: vandps (%rdx), %xmm1, %xmm1
529; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0
530; CHECK-XOP-NEXT: retq
531 %x = load <4 x i32>, <4 x i32> *%px, align 16
532 %y = load <4 x i32>, <4 x i32> *%py, align 16
533 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
534 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
535 %n1 = and <4 x i32> %n0, %mask
536 %r = xor <4 x i32> %n1, %y
537 ret <4 x i32> %r
538}
539
540; This is not a canonical form. Testing for completeness only.
541define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
542; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
543; CHECK-SSE1: # %bb.0:
544; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
545; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
546; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1
547; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
548; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
549; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
550; CHECK-SSE1-NEXT: movq %rdi, %rax
551; CHECK-SSE1-NEXT: retq
552;
553; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
554; CHECK-SSE2: # %bb.0:
555; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
556; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
557; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1
558; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
559; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
560; CHECK-SSE2-NEXT: retq
561;
562; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
563; CHECK-XOP: # %bb.0:
564; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
565; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
566; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
567; CHECK-XOP-NEXT: retq
568 %x = load <4 x i32>, <4 x i32> *%px, align 16
569 %y = load <4 x i32>, <4 x i32> *%py, align 16
570 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
571 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
572 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
573 %my = and <4 x i32> %mask, %y
574 %r = or <4 x i32> %mx, %my
575 ret <4 x i32> %r
576}
577
578; This is not a canonical form. Testing for completeness only.
579define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
580; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
581; CHECK-SSE1: # %bb.0:
582; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
583; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1
584; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm2 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
585; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2
586; CHECK-SSE1-NEXT: andnps %xmm2, %xmm1
587; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
588; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
589; CHECK-SSE1-NEXT: movq %rdi, %rax
590; CHECK-SSE1-NEXT: retq
591;
592; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
593; CHECK-SSE2: # %bb.0:
594; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
595; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
596; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm2 = [42,42,42,42]
597; CHECK-SSE2-NEXT: xorps %xmm1, %xmm2
598; CHECK-SSE2-NEXT: andnps %xmm2, %xmm0
599; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
600; CHECK-SSE2-NEXT: retq
601;
602; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
603; CHECK-XOP: # %bb.0:
604; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
605; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm1
606; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm2
607; CHECK-XOP-NEXT: vandnps %xmm2, %xmm1, %xmm1
608; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0
609; CHECK-XOP-NEXT: retq
610 %x = load <4 x i32>, <4 x i32> *%px, align 16
611 %y = load <4 x i32>, <4 x i32> *%py, align 16
612 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
613 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
614 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
615 %n1 = and <4 x i32> %n0, %notmask
616 %r = xor <4 x i32> %n1, %y
617 ret <4 x i32> %r
618}