blob: 7cb0d3ff58f4c48e05a0bbdc8094f2b3c2873583 [file] [log] [blame]
Roman Lebedevfd79bc32018-05-21 21:40:51 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
5
6; ============================================================================ ;
7; Various cases with %x and/or %y being a constant
8; ============================================================================ ;
9
10define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
11; CHECK-SSE1-LABEL: out_constant_varx_mone:
12; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000013; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +000014; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Simon Pilgrimad23f272018-10-02 09:08:51 +000015; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
Roman Lebedevfd79bc32018-05-21 21:40:51 +000016; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
17; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
18; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
19; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +000020; CHECK-SSE1-NEXT: retq
21;
22; CHECK-SSE2-LABEL: out_constant_varx_mone:
23; CHECK-SSE2: # %bb.0:
24; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29; CHECK-SSE2-NEXT: retq
30;
31; CHECK-XOP-LABEL: out_constant_varx_mone:
32; CHECK-XOP: # %bb.0:
33; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, <4 x i32> *%px, align 16
40 %y = load <4 x i32>, <4 x i32> *%py, align 16
41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
46 ret <4 x i32> %r
47}
48
49define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
50; CHECK-SSE1-LABEL: in_constant_varx_mone:
51; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000052; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +000053; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
54; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
55; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0
56; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +000057; CHECK-SSE1-NEXT: retq
58;
59; CHECK-SSE2-LABEL: in_constant_varx_mone:
60; CHECK-SSE2: # %bb.0:
Roman Lebedev9f65d162018-05-21 21:41:10 +000061; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000062; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
Roman Lebedev9f65d162018-05-21 21:41:10 +000063; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0
64; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000065; CHECK-SSE2-NEXT: retq
66;
67; CHECK-XOP-LABEL: in_constant_varx_mone:
68; CHECK-XOP: # %bb.0:
Roman Lebedev9f65d162018-05-21 21:41:10 +000069; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000070; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Roman Lebedev9f65d162018-05-21 21:41:10 +000071; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0
72; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +000073; CHECK-XOP-NEXT: retq
74 %x = load <4 x i32>, <4 x i32> *%px, align 16
75 %y = load <4 x i32>, <4 x i32> *%py, align 16
76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
78 %n1 = and <4 x i32> %n0, %mask
79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
80 ret <4 x i32> %r
81}
82
83; This is not a canonical form. Testing for completeness only.
84define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
85; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
86; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000087; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +000088; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
89; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
90; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
91; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
92; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +000093; CHECK-SSE1-NEXT: retq
94;
95; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
96; CHECK-SSE2: # %bb.0:
97; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
98; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
99; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
100; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
101; CHECK-SSE2-NEXT: retq
102;
103; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
104; CHECK-XOP: # %bb.0:
105; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
106; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
107; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
108; CHECK-XOP-NEXT: retq
109 %x = load <4 x i32>, <4 x i32> *%px, align 16
110 %y = load <4 x i32>, <4 x i32> *%py, align 16
111 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
112 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
113 %mx = and <4 x i32> %notmask, %x
114 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
115 %r = or <4 x i32> %mx, %my
116 ret <4 x i32> %r
117}
118
119; This is not a canonical form. Testing for completeness only.
120define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
121; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
122; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000123; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000124; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
Simon Pilgrimad23f272018-10-02 09:08:51 +0000125; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000126; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
127; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
128; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
129; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
130; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000131; CHECK-SSE1-NEXT: retq
132;
133; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
134; CHECK-SSE2: # %bb.0:
Craig Topper8315d992018-10-26 17:21:26 +0000135; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
Roman Lebedev9f65d162018-05-21 21:41:10 +0000136; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
Craig Topper8315d992018-10-26 17:21:26 +0000137; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
Roman Lebedev9f65d162018-05-21 21:41:10 +0000138; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
139; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000140; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
141; CHECK-SSE2-NEXT: retq
142;
143; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
144; CHECK-XOP: # %bb.0:
Craig Topper8315d992018-10-26 17:21:26 +0000145; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000146; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Craig Topper8315d992018-10-26 17:21:26 +0000147; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
Roman Lebedev9f65d162018-05-21 21:41:10 +0000148; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000149; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
150; CHECK-XOP-NEXT: retq
151 %x = load <4 x i32>, <4 x i32> *%px, align 16
152 %y = load <4 x i32>, <4 x i32> *%py, align 16
153 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
154 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
155 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
156 %n1 = and <4 x i32> %n0, %notmask
157 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
158 ret <4 x i32> %r
159}
160
161define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
162; CHECK-SSE1-LABEL: out_constant_varx_42:
163; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000164; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000165; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
166; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
167; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
168; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
169; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
170; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000171; CHECK-SSE1-NEXT: retq
172;
173; CHECK-SSE2-LABEL: out_constant_varx_42:
174; CHECK-SSE2: # %bb.0:
175; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
176; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
177; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
178; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
179; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
180; CHECK-SSE2-NEXT: retq
181;
182; CHECK-XOP-LABEL: out_constant_varx_42:
183; CHECK-XOP: # %bb.0:
184; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
185; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
186; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
187; CHECK-XOP-NEXT: retq
188 %x = load <4 x i32>, <4 x i32> *%px, align 16
189 %y = load <4 x i32>, <4 x i32> *%py, align 16
190 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
191 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
192 %mx = and <4 x i32> %mask, %x
193 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
194 %r = or <4 x i32> %mx, %my
195 ret <4 x i32> %r
196}
197
198define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
199; CHECK-SSE1-LABEL: in_constant_varx_42:
200; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000201; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedev7772de22018-05-21 21:41:02 +0000202; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000203; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000204; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
205; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
206; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
207; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000208; CHECK-SSE1-NEXT: retq
209;
210; CHECK-SSE2-LABEL: in_constant_varx_42:
211; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000212; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
213; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
214; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
215; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
216; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000217; CHECK-SSE2-NEXT: retq
218;
219; CHECK-XOP-LABEL: in_constant_varx_42:
220; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000221; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
222; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
223; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000224; CHECK-XOP-NEXT: retq
225 %x = load <4 x i32>, <4 x i32> *%px, align 16
226 %y = load <4 x i32>, <4 x i32> *%py, align 16
227 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
228 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
229 %n1 = and <4 x i32> %n0, %mask
230 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
231 ret <4 x i32> %r
232}
233
234; This is not a canonical form. Testing for completeness only.
235define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
236; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
237; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000238; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000239; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
240; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
241; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
242; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
243; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
244; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000245; CHECK-SSE1-NEXT: retq
246;
247; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
248; CHECK-SSE2: # %bb.0:
249; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
250; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
251; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
252; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
253; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
254; CHECK-SSE2-NEXT: retq
255;
256; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
257; CHECK-XOP: # %bb.0:
258; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
259; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
260; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
261; CHECK-XOP-NEXT: retq
262 %x = load <4 x i32>, <4 x i32> *%px, align 16
263 %y = load <4 x i32>, <4 x i32> *%py, align 16
264 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
265 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
266 %mx = and <4 x i32> %notmask, %x
267 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
268 %r = or <4 x i32> %mx, %my
269 ret <4 x i32> %r
270}
271
272; This is not a canonical form. Testing for completeness only.
273define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
274; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
275; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000276; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000277; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000278; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
279; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
280; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
281; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000282; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000283; CHECK-SSE1-NEXT: retq
284;
285; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
286; CHECK-SSE2: # %bb.0:
287; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000288; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
289; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
290; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
291; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000292; CHECK-SSE2-NEXT: retq
293;
294; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
295; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000296; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
297; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
298; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000299; CHECK-XOP-NEXT: retq
300 %x = load <4 x i32>, <4 x i32> *%px, align 16
301 %y = load <4 x i32>, <4 x i32> *%py, align 16
302 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
303 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
304 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
305 %n1 = and <4 x i32> %n0, %notmask
306 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
307 ret <4 x i32> %r
308}
309
310define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
311; CHECK-SSE1-LABEL: out_constant_mone_vary:
312; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000313; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000314; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
315; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
316; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
317; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
318; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000319; CHECK-SSE1-NEXT: retq
320;
321; CHECK-SSE2-LABEL: out_constant_mone_vary:
322; CHECK-SSE2: # %bb.0:
323; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
324; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
325; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
326; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
327; CHECK-SSE2-NEXT: retq
328;
329; CHECK-XOP-LABEL: out_constant_mone_vary:
330; CHECK-XOP: # %bb.0:
331; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
332; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
333; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
334; CHECK-XOP-NEXT: retq
335 %x = load <4 x i32>, <4 x i32> *%px, align 16
336 %y = load <4 x i32>, <4 x i32> *%py, align 16
337 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
338 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
339 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
340 %my = and <4 x i32> %notmask, %y
341 %r = or <4 x i32> %mx, %my
342 ret <4 x i32> %r
343}
344
345define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
346; CHECK-SSE1-LABEL: in_constant_mone_vary:
347; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000348; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedev7772de22018-05-21 21:41:02 +0000349; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000350; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000351; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
352; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000353; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000354; CHECK-SSE1-NEXT: retq
355;
356; CHECK-SSE2-LABEL: in_constant_mone_vary:
357; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000358; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000359; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000360; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
361; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000362; CHECK-SSE2-NEXT: retq
363;
364; CHECK-XOP-LABEL: in_constant_mone_vary:
365; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000366; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
367; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
368; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000369; CHECK-XOP-NEXT: retq
370 %x = load <4 x i32>, <4 x i32> *%px, align 16
371 %y = load <4 x i32>, <4 x i32> *%py, align 16
372 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
373 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
374 %n1 = and <4 x i32> %n0, %mask
375 %r = xor <4 x i32> %n1, %y
376 ret <4 x i32> %r
377}
378
379; This is not a canonical form. Testing for completeness only.
380define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
381; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
382; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000383; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000384; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Simon Pilgrimad23f272018-10-02 09:08:51 +0000385; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000386; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
387; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
388; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
389; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000390; CHECK-SSE1-NEXT: retq
391;
392; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
393; CHECK-SSE2: # %bb.0:
394; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
395; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
396; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
397; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
398; CHECK-SSE2-NEXT: por %xmm1, %xmm0
399; CHECK-SSE2-NEXT: retq
400;
401; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
402; CHECK-XOP: # %bb.0:
403; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
404; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
405; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
406; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
407; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
408; CHECK-XOP-NEXT: retq
409 %x = load <4 x i32>, <4 x i32> *%px, align 16
410 %y = load <4 x i32>, <4 x i32> *%py, align 16
411 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
412 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
413 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
414 %my = and <4 x i32> %mask, %y
415 %r = or <4 x i32> %mx, %my
416 ret <4 x i32> %r
417}
418
419; This is not a canonical form. Testing for completeness only.
420define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
421; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
422; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000423; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedev7772de22018-05-21 21:41:02 +0000424; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Simon Pilgrimad23f272018-10-02 09:08:51 +0000425; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
Roman Lebedev7772de22018-05-21 21:41:02 +0000426; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
427; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
428; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
429; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000430; CHECK-SSE1-NEXT: retq
431;
432; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
433; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000434; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
435; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
436; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
437; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
438; CHECK-SSE2-NEXT: por %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000439; CHECK-SSE2-NEXT: retq
440;
441; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
442; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000443; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000444; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
Roman Lebedev7772de22018-05-21 21:41:02 +0000445; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
446; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
447; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000448; CHECK-XOP-NEXT: retq
449 %x = load <4 x i32>, <4 x i32> *%px, align 16
450 %y = load <4 x i32>, <4 x i32> *%py, align 16
451 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
452 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
453 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
454 %n1 = and <4 x i32> %n0, %notmask
455 %r = xor <4 x i32> %n1, %y
456 ret <4 x i32> %r
457}
458
459define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
460; CHECK-SSE1-LABEL: out_constant_42_vary:
461; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000462; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000463; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
Simon Pilgrimad23f272018-10-02 09:08:51 +0000464; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44]
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000465; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
466; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
467; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
468; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000469; CHECK-SSE1-NEXT: retq
470;
471; CHECK-SSE2-LABEL: out_constant_42_vary:
472; CHECK-SSE2: # %bb.0:
473; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
474; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
475; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
476; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
477; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
478; CHECK-SSE2-NEXT: retq
479;
480; CHECK-XOP-LABEL: out_constant_42_vary:
481; CHECK-XOP: # %bb.0:
482; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
483; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
484; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
485; CHECK-XOP-NEXT: retq
486 %x = load <4 x i32>, <4 x i32> *%px, align 16
487 %y = load <4 x i32>, <4 x i32> *%py, align 16
488 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
489 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
490 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
491 %my = and <4 x i32> %notmask, %y
492 %r = or <4 x i32> %mx, %my
493 ret <4 x i32> %r
494}
495
496define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
497; CHECK-SSE1-LABEL: in_constant_42_vary:
498; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000499; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedev7772de22018-05-21 21:41:02 +0000500; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
501; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
502; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
503; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
504; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
505; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000506; CHECK-SSE1-NEXT: retq
507;
508; CHECK-SSE2-LABEL: in_constant_42_vary:
509; CHECK-SSE2: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000510; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
511; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
512; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1
513; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
514; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000515; CHECK-SSE2-NEXT: retq
516;
517; CHECK-XOP-LABEL: in_constant_42_vary:
518; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000519; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
520; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
521; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000522; CHECK-XOP-NEXT: retq
523 %x = load <4 x i32>, <4 x i32> *%px, align 16
524 %y = load <4 x i32>, <4 x i32> *%py, align 16
525 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
526 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
527 %n1 = and <4 x i32> %n0, %mask
528 %r = xor <4 x i32> %n1, %y
529 ret <4 x i32> %r
530}
531
532; This is not a canonical form. Testing for completeness only.
533define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
534; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
535; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000536; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000537; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
538; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
539; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1
540; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
541; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
542; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000543; CHECK-SSE1-NEXT: retq
544;
545; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
546; CHECK-SSE2: # %bb.0:
547; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
548; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
549; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1
550; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
551; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
552; CHECK-SSE2-NEXT: retq
553;
554; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
555; CHECK-XOP: # %bb.0:
556; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
557; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
558; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
559; CHECK-XOP-NEXT: retq
560 %x = load <4 x i32>, <4 x i32> *%px, align 16
561 %y = load <4 x i32>, <4 x i32> *%py, align 16
562 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
563 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
564 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
565 %my = and <4 x i32> %mask, %y
566 %r = or <4 x i32> %mx, %my
567 ret <4 x i32> %r
568}
569
570; This is not a canonical form. Testing for completeness only.
571define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
572; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
573; CHECK-SSE1: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000574; CHECK-SSE1-NEXT: movq %rdi, %rax
Roman Lebedev7772de22018-05-21 21:41:02 +0000575; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
576; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1
577; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
578; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
579; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
580; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000581; CHECK-SSE1-NEXT: retq
582;
583; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
584; CHECK-SSE2: # %bb.0:
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000585; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
Roman Lebedev7772de22018-05-21 21:41:02 +0000586; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
587; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
588; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
589; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000590; CHECK-SSE2-NEXT: retq
591;
592; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
593; CHECK-XOP: # %bb.0:
Roman Lebedev7772de22018-05-21 21:41:02 +0000594; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
595; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
596; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
Roman Lebedevfd79bc32018-05-21 21:40:51 +0000597; CHECK-XOP-NEXT: retq
598 %x = load <4 x i32>, <4 x i32> *%px, align 16
599 %y = load <4 x i32>, <4 x i32> *%py, align 16
600 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
601 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
602 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
603 %n1 = and <4 x i32> %n0, %notmask
604 %r = xor <4 x i32> %n1, %y
605 ret <4 x i32> %r
606}