blob: 20c3ef8ea88d6204fa73ddf91e80308450c7daca [file] [log] [blame]
Roman Lebedevfd79bc32018-05-21 21:40:51 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
5
6; ============================================================================ ;
7; Various cases with %x and/or %y being a constant
8; ============================================================================ ;
9
10define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
11; CHECK-SSE1-LABEL: out_constant_varx_mone:
12; CHECK-SSE1: # %bb.0:
13; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
14; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
15; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
16; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
17; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
18; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
19; CHECK-SSE1-NEXT: movq %rdi, %rax
20; CHECK-SSE1-NEXT: retq
21;
22; CHECK-SSE2-LABEL: out_constant_varx_mone:
23; CHECK-SSE2: # %bb.0:
24; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29; CHECK-SSE2-NEXT: retq
30;
31; CHECK-XOP-LABEL: out_constant_varx_mone:
32; CHECK-XOP: # %bb.0:
33; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, <4 x i32> *%px, align 16
40 %y = load <4 x i32>, <4 x i32> *%py, align 16
41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
46 ret <4 x i32> %r
47}
48
49define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
50; CHECK-SSE1-LABEL: in_constant_varx_mone:
51; CHECK-SSE1: # %bb.0:
52; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
53; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
54; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0
55; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
56; CHECK-SSE1-NEXT: movq %rdi, %rax
57; CHECK-SSE1-NEXT: retq
58;
59; CHECK-SSE2-LABEL: in_constant_varx_mone:
60; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +000061; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000062; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +000063; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
64; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
65; CHECK-SSE2-NEXT: por %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000066; CHECK-SSE2-NEXT: retq
67;
68; CHECK-XOP-LABEL: in_constant_varx_mone:
69; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +000070; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000071; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +000072; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
73; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
74; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000075; CHECK-XOP-NEXT: retq
76 %x = load <4 x i32>, <4 x i32> *%px, align 16
77 %y = load <4 x i32>, <4 x i32> *%py, align 16
78 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
79 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
80 %n1 = and <4 x i32> %n0, %mask
81 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
82 ret <4 x i32> %r
83}
84
85; This is not a canonical form. Testing for completeness only.
86define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
87; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
88; CHECK-SSE1: # %bb.0:
89; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
90; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
91; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
92; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
93; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
94; CHECK-SSE1-NEXT: movq %rdi, %rax
95; CHECK-SSE1-NEXT: retq
96;
97; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
98; CHECK-SSE2: # %bb.0:
99; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
100; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
101; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
102; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
103; CHECK-SSE2-NEXT: retq
104;
105; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
106; CHECK-XOP: # %bb.0:
107; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
108; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
109; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
110; CHECK-XOP-NEXT: retq
111 %x = load <4 x i32>, <4 x i32> *%px, align 16
112 %y = load <4 x i32>, <4 x i32> *%py, align 16
113 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
114 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
115 %mx = and <4 x i32> %notmask, %x
116 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
117 %r = or <4 x i32> %mx, %my
118 ret <4 x i32> %r
119}
120
121; This is not a canonical form. Testing for completeness only.
122define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
123; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
124; CHECK-SSE1: # %bb.0:
125; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
126; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
127; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
128; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
129; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
130; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
131; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
132; CHECK-SSE1-NEXT: movq %rdi, %rax
133; CHECK-SSE1-NEXT: retq
134;
135; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
136; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000137; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm1
138; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
139; CHECK-SSE2-NEXT: pxor %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000140; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000141; CHECK-SSE2-NEXT: pandn (%rdi), %xmm1
142; CHECK-SSE2-NEXT: por %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000143; CHECK-SSE2-NEXT: retq
144;
145; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
146; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000147; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000148; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000149; CHECK-XOP-NEXT: vpandn (%rdi), %xmm0, %xmm2
150; CHECK-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000151; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000152; CHECK-XOP-NEXT: vpor %xmm0, %xmm2, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000153; CHECK-XOP-NEXT: retq
154 %x = load <4 x i32>, <4 x i32> *%px, align 16
155 %y = load <4 x i32>, <4 x i32> *%py, align 16
156 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
157 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
158 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
159 %n1 = and <4 x i32> %n0, %notmask
160 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
161 ret <4 x i32> %r
162}
163
164define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
165; CHECK-SSE1-LABEL: out_constant_varx_42:
166; CHECK-SSE1: # %bb.0:
167; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
168; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
169; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
170; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
171; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
172; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
173; CHECK-SSE1-NEXT: movq %rdi, %rax
174; CHECK-SSE1-NEXT: retq
175;
176; CHECK-SSE2-LABEL: out_constant_varx_42:
177; CHECK-SSE2: # %bb.0:
178; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
179; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
180; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
181; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
182; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
183; CHECK-SSE2-NEXT: retq
184;
185; CHECK-XOP-LABEL: out_constant_varx_42:
186; CHECK-XOP: # %bb.0:
187; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
188; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
189; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
190; CHECK-XOP-NEXT: retq
191 %x = load <4 x i32>, <4 x i32> *%px, align 16
192 %y = load <4 x i32>, <4 x i32> *%py, align 16
193 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
194 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
195 %mx = and <4 x i32> %mask, %x
196 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
197 %r = or <4 x i32> %mx, %my
198 ret <4 x i32> %r
199}
200
201define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
202; CHECK-SSE1-LABEL: in_constant_varx_42:
203; CHECK-SSE1: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000204; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000205; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000206; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
207; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
208; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
209; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000210; CHECK-SSE1-NEXT: movq %rdi, %rax
211; CHECK-SSE1-NEXT: retq
212;
213; CHECK-SSE2-LABEL: in_constant_varx_42:
214; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000215; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
216; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
217; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
218; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
219; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000220; CHECK-SSE2-NEXT: retq
221;
222; CHECK-XOP-LABEL: in_constant_varx_42:
223; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000224; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
225; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
226; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000227; CHECK-XOP-NEXT: retq
228 %x = load <4 x i32>, <4 x i32> *%px, align 16
229 %y = load <4 x i32>, <4 x i32> *%py, align 16
230 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
231 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
232 %n1 = and <4 x i32> %n0, %mask
233 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
234 ret <4 x i32> %r
235}
236
237; This is not a canonical form. Testing for completeness only.
238define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
239; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
240; CHECK-SSE1: # %bb.0:
241; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
242; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
243; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
244; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
245; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
246; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
247; CHECK-SSE1-NEXT: movq %rdi, %rax
248; CHECK-SSE1-NEXT: retq
249;
250; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
251; CHECK-SSE2: # %bb.0:
252; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
253; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
254; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
255; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
256; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
257; CHECK-SSE2-NEXT: retq
258;
259; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
260; CHECK-XOP: # %bb.0:
261; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
262; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
263; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
264; CHECK-XOP-NEXT: retq
265 %x = load <4 x i32>, <4 x i32> *%px, align 16
266 %y = load <4 x i32>, <4 x i32> *%py, align 16
267 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
268 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
269 %mx = and <4 x i32> %notmask, %x
270 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
271 %r = or <4 x i32> %mx, %my
272 ret <4 x i32> %r
273}
274
275; This is not a canonical form. Testing for completeness only.
276define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
277; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
278; CHECK-SSE1: # %bb.0:
279; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000280; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
281; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
282; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
283; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000284; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
285; CHECK-SSE1-NEXT: movq %rdi, %rax
286; CHECK-SSE1-NEXT: retq
287;
288; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
289; CHECK-SSE2: # %bb.0:
290; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000291; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
292; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
293; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
294; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000295; CHECK-SSE2-NEXT: retq
296;
297; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
298; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000299; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
300; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
301; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000302; CHECK-XOP-NEXT: retq
303 %x = load <4 x i32>, <4 x i32> *%px, align 16
304 %y = load <4 x i32>, <4 x i32> *%py, align 16
305 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
306 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
307 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
308 %n1 = and <4 x i32> %n0, %notmask
309 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
310 ret <4 x i32> %r
311}
312
313define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
314; CHECK-SSE1-LABEL: out_constant_mone_vary:
315; CHECK-SSE1: # %bb.0:
316; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
317; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
318; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
319; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
320; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
321; CHECK-SSE1-NEXT: movq %rdi, %rax
322; CHECK-SSE1-NEXT: retq
323;
324; CHECK-SSE2-LABEL: out_constant_mone_vary:
325; CHECK-SSE2: # %bb.0:
326; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
327; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
328; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
329; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
330; CHECK-SSE2-NEXT: retq
331;
332; CHECK-XOP-LABEL: out_constant_mone_vary:
333; CHECK-XOP: # %bb.0:
334; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
335; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
336; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
337; CHECK-XOP-NEXT: retq
338 %x = load <4 x i32>, <4 x i32> *%px, align 16
339 %y = load <4 x i32>, <4 x i32> *%py, align 16
340 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
341 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
342 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
343 %my = and <4 x i32> %notmask, %y
344 %r = or <4 x i32> %mx, %my
345 ret <4 x i32> %r
346}
347
348define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
349; CHECK-SSE1-LABEL: in_constant_mone_vary:
350; CHECK-SSE1: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000351; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000352; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000353; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
354; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000355; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
356; CHECK-SSE1-NEXT: movq %rdi, %rax
357; CHECK-SSE1-NEXT: retq
358;
359; CHECK-SSE2-LABEL: in_constant_mone_vary:
360; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000361; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000362; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000363; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
364; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000365; CHECK-SSE2-NEXT: retq
366;
367; CHECK-XOP-LABEL: in_constant_mone_vary:
368; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000369; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
370; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
371; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000372; CHECK-XOP-NEXT: retq
373 %x = load <4 x i32>, <4 x i32> *%px, align 16
374 %y = load <4 x i32>, <4 x i32> *%py, align 16
375 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
376 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
377 %n1 = and <4 x i32> %n0, %mask
378 %r = xor <4 x i32> %n1, %y
379 ret <4 x i32> %r
380}
381
382; This is not a canonical form. Testing for completeness only.
383define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
384; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
385; CHECK-SSE1: # %bb.0:
386; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
387; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
388; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
389; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
390; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
391; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
392; CHECK-SSE1-NEXT: movq %rdi, %rax
393; CHECK-SSE1-NEXT: retq
394;
395; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
396; CHECK-SSE2: # %bb.0:
397; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
398; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
399; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
400; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
401; CHECK-SSE2-NEXT: por %xmm1, %xmm0
402; CHECK-SSE2-NEXT: retq
403;
404; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
405; CHECK-XOP: # %bb.0:
406; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
407; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
408; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
409; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
410; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
411; CHECK-XOP-NEXT: retq
412 %x = load <4 x i32>, <4 x i32> *%px, align 16
413 %y = load <4 x i32>, <4 x i32> *%py, align 16
414 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
415 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
416 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
417 %my = and <4 x i32> %mask, %y
418 %r = or <4 x i32> %mx, %my
419 ret <4 x i32> %r
420}
421
422; This is not a canonical form. Testing for completeness only.
423define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
424; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
425; CHECK-SSE1: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000426; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
427; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
428; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
429; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
430; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
431; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000432; CHECK-SSE1-NEXT: movq %rdi, %rax
433; CHECK-SSE1-NEXT: retq
434;
435; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
436; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000437; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
438; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
439; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
440; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
441; CHECK-SSE2-NEXT: por %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000442; CHECK-SSE2-NEXT: retq
443;
444; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
445; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000446; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000447; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000448; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
449; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
450; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000451; CHECK-XOP-NEXT: retq
452 %x = load <4 x i32>, <4 x i32> *%px, align 16
453 %y = load <4 x i32>, <4 x i32> *%py, align 16
454 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
455 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
456 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
457 %n1 = and <4 x i32> %n0, %notmask
458 %r = xor <4 x i32> %n1, %y
459 ret <4 x i32> %r
460}
461
462define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
463; CHECK-SSE1-LABEL: out_constant_42_vary:
464; CHECK-SSE1: # %bb.0:
465; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
466; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
467; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
468; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
469; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
470; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
471; CHECK-SSE1-NEXT: movq %rdi, %rax
472; CHECK-SSE1-NEXT: retq
473;
474; CHECK-SSE2-LABEL: out_constant_42_vary:
475; CHECK-SSE2: # %bb.0:
476; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
477; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
478; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
479; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
480; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
481; CHECK-SSE2-NEXT: retq
482;
483; CHECK-XOP-LABEL: out_constant_42_vary:
484; CHECK-XOP: # %bb.0:
485; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
486; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
487; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
488; CHECK-XOP-NEXT: retq
489 %x = load <4 x i32>, <4 x i32> *%px, align 16
490 %y = load <4 x i32>, <4 x i32> *%py, align 16
491 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
492 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
493 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
494 %my = and <4 x i32> %notmask, %y
495 %r = or <4 x i32> %mx, %my
496 ret <4 x i32> %r
497}
498
499define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
500; CHECK-SSE1-LABEL: in_constant_42_vary:
501; CHECK-SSE1: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000502; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
503; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
504; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
505; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
506; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
507; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000508; CHECK-SSE1-NEXT: movq %rdi, %rax
509; CHECK-SSE1-NEXT: retq
510;
511; CHECK-SSE2-LABEL: in_constant_42_vary:
512; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000513; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
514; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
515; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1
516; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
517; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000518; CHECK-SSE2-NEXT: retq
519;
520; CHECK-XOP-LABEL: in_constant_42_vary:
521; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000522; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
523; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
524; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000525; CHECK-XOP-NEXT: retq
526 %x = load <4 x i32>, <4 x i32> *%px, align 16
527 %y = load <4 x i32>, <4 x i32> *%py, align 16
528 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
529 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
530 %n1 = and <4 x i32> %n0, %mask
531 %r = xor <4 x i32> %n1, %y
532 ret <4 x i32> %r
533}
534
535; This is not a canonical form. Testing for completeness only.
536define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
537; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
538; CHECK-SSE1: # %bb.0:
539; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
540; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
541; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1
542; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
543; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
544; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
545; CHECK-SSE1-NEXT: movq %rdi, %rax
546; CHECK-SSE1-NEXT: retq
547;
548; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
549; CHECK-SSE2: # %bb.0:
550; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
551; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
552; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1
553; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
554; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
555; CHECK-SSE2-NEXT: retq
556;
557; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
558; CHECK-XOP: # %bb.0:
559; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
560; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
561; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
562; CHECK-XOP-NEXT: retq
563 %x = load <4 x i32>, <4 x i32> *%px, align 16
564 %y = load <4 x i32>, <4 x i32> *%py, align 16
565 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
566 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
567 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
568 %my = and <4 x i32> %mask, %y
569 %r = or <4 x i32> %mx, %my
570 ret <4 x i32> %r
571}
572
573; This is not a canonical form. Testing for completeness only.
574define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
575; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
576; CHECK-SSE1: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000577; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
578; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1
579; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
580; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
581; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
582; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000583; CHECK-SSE1-NEXT: movq %rdi, %rax
584; CHECK-SSE1-NEXT: retq
585;
586; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
587; CHECK-SSE2: # %bb.0:
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000588; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000589; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
590; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
591; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
592; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000593; CHECK-SSE2-NEXT: retq
594;
595; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
596; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000597; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
598; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
599; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000600; CHECK-XOP-NEXT: retq
601 %x = load <4 x i32>, <4 x i32> *%px, align 16
602 %y = load <4 x i32>, <4 x i32> *%py, align 16
603 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
604 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
605 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
606 %n1 = and <4 x i32> %n0, %notmask
607 %r = xor <4 x i32> %n1, %y
608 ret <4 x i32> %r
609}