blob: afc48bc57ee7dfac58cb531ad8accb4ef2d240ce [file] [log] [blame]
Simon Pilgrim7d71ed52017-04-23 21:00:25 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
10
11define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
12; GENERIC-LABEL: crc32_32_8:
13; GENERIC: # BB#0:
14; GENERIC-NEXT: crc32b %sil, %edi
15; GENERIC-NEXT: crc32b (%rdx), %edi
16; GENERIC-NEXT: movl %edi, %eax
17; GENERIC-NEXT: retq
18;
19; SLM-LABEL: crc32_32_8:
20; SLM: # BB#0:
21; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00]
22; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00]
23; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
24; SLM-NEXT: retq # sched: [4:1.00]
25;
26; SANDY-LABEL: crc32_32_8:
27; SANDY: # BB#0:
28; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
29; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
30; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
31; SANDY-NEXT: retq # sched: [5:1.00]
32;
33; HASWELL-LABEL: crc32_32_8:
34; HASWELL: # BB#0:
35; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
36; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
37; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
38; HASWELL-NEXT: retq # sched: [1:1.00]
39;
40; BTVER2-LABEL: crc32_32_8:
41; BTVER2: # BB#0:
42; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00]
43; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
44; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
45; BTVER2-NEXT: retq # sched: [4:1.00]
46 %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
47 %2 = load i8, i8 *%a2
48 %3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %1, i8 %2)
49 ret i32 %3
50}
51declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
52
53define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
54; GENERIC-LABEL: crc32_32_16:
55; GENERIC: # BB#0:
56; GENERIC-NEXT: crc32w %si, %edi
57; GENERIC-NEXT: crc32w (%rdx), %edi
58; GENERIC-NEXT: movl %edi, %eax
59; GENERIC-NEXT: retq
60;
61; SLM-LABEL: crc32_32_16:
62; SLM: # BB#0:
63; SLM-NEXT: crc32w %si, %edi # sched: [3:1.00]
64; SLM-NEXT: crc32w (%rdx), %edi # sched: [6:1.00]
65; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
66; SLM-NEXT: retq # sched: [4:1.00]
67;
68; SANDY-LABEL: crc32_32_16:
69; SANDY: # BB#0:
70; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
71; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
72; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
73; SANDY-NEXT: retq # sched: [5:1.00]
74;
75; HASWELL-LABEL: crc32_32_16:
76; HASWELL: # BB#0:
77; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
78; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
79; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
80; HASWELL-NEXT: retq # sched: [1:1.00]
81;
82; BTVER2-LABEL: crc32_32_16:
83; BTVER2: # BB#0:
84; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00]
85; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
86; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
87; BTVER2-NEXT: retq # sched: [4:1.00]
88 %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
89 %2 = load i16, i16 *%a2
90 %3 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %1, i16 %2)
91 ret i32 %3
92}
93declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
94
95define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
96; GENERIC-LABEL: crc32_32_32:
97; GENERIC: # BB#0:
98; GENERIC-NEXT: crc32l %esi, %edi
99; GENERIC-NEXT: crc32l (%rdx), %edi
100; GENERIC-NEXT: movl %edi, %eax
101; GENERIC-NEXT: retq
102;
103; SLM-LABEL: crc32_32_32:
104; SLM: # BB#0:
105; SLM-NEXT: crc32l %esi, %edi # sched: [3:1.00]
106; SLM-NEXT: crc32l (%rdx), %edi # sched: [6:1.00]
107; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
108; SLM-NEXT: retq # sched: [4:1.00]
109;
110; SANDY-LABEL: crc32_32_32:
111; SANDY: # BB#0:
112; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
113; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
114; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
115; SANDY-NEXT: retq # sched: [5:1.00]
116;
117; HASWELL-LABEL: crc32_32_32:
118; HASWELL: # BB#0:
119; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
120; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
121; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
122; HASWELL-NEXT: retq # sched: [1:1.00]
123;
124; BTVER2-LABEL: crc32_32_32:
125; BTVER2: # BB#0:
126; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00]
127; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
128; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
129; BTVER2-NEXT: retq # sched: [4:1.00]
130 %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
131 %2 = load i32, i32 *%a2
132 %3 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %1, i32 %2)
133 ret i32 %3
134}
135declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
136
137define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
138; GENERIC-LABEL: crc32_64_8:
139; GENERIC: # BB#0:
140; GENERIC-NEXT: crc32b %sil, %edi
141; GENERIC-NEXT: crc32b (%rdx), %edi
142; GENERIC-NEXT: movq %rdi, %rax
143; GENERIC-NEXT: retq
144;
145; SLM-LABEL: crc32_64_8:
146; SLM: # BB#0:
147; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00]
148; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00]
149; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
150; SLM-NEXT: retq # sched: [4:1.00]
151;
152; SANDY-LABEL: crc32_64_8:
153; SANDY: # BB#0:
154; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
155; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
156; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
157; SANDY-NEXT: retq # sched: [5:1.00]
158;
159; HASWELL-LABEL: crc32_64_8:
160; HASWELL: # BB#0:
161; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
162; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
163; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
164; HASWELL-NEXT: retq # sched: [1:1.00]
165;
166; BTVER2-LABEL: crc32_64_8:
167; BTVER2: # BB#0:
168; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00]
169; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
170; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17]
171; BTVER2-NEXT: retq # sched: [4:1.00]
172 %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
173 %2 = load i8, i8 *%a2
174 %3 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %1, i8 %2)
175 ret i64 %3
176}
177declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
178
179define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
180; GENERIC-LABEL: crc32_64_64:
181; GENERIC: # BB#0:
182; GENERIC-NEXT: crc32q %rsi, %rdi
183; GENERIC-NEXT: crc32q (%rdx), %rdi
184; GENERIC-NEXT: movq %rdi, %rax
185; GENERIC-NEXT: retq
186;
187; SLM-LABEL: crc32_64_64:
188; SLM: # BB#0:
189; SLM-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
190; SLM-NEXT: crc32q (%rdx), %rdi # sched: [6:1.00]
191; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
192; SLM-NEXT: retq # sched: [4:1.00]
193;
194; SANDY-LABEL: crc32_64_64:
195; SANDY: # BB#0:
196; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
197; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
198; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
199; SANDY-NEXT: retq # sched: [5:1.00]
200;
201; HASWELL-LABEL: crc32_64_64:
202; HASWELL: # BB#0:
203; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
204; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
205; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
206; HASWELL-NEXT: retq # sched: [1:1.00]
207;
208; BTVER2-LABEL: crc32_64_64:
209; BTVER2: # BB#0:
210; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
211; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
212; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17]
213; BTVER2-NEXT: retq # sched: [4:1.00]
214 %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
215 %2 = load i64, i64 *%a2
216 %3 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %1, i64 %2)
217 ret i64 %3
218}
219declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
220
221define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
222; GENERIC-LABEL: test_pcmpestri:
223; GENERIC: # BB#0:
224; GENERIC-NEXT: movl $7, %eax
225; GENERIC-NEXT: movl $7, %edx
226; GENERIC-NEXT: pcmpestri $7, %xmm1, %xmm0
227; GENERIC-NEXT: movl %ecx, %esi
228; GENERIC-NEXT: movl $7, %eax
229; GENERIC-NEXT: movl $7, %edx
230; GENERIC-NEXT: pcmpestri $7, (%rdi), %xmm0
231; GENERIC-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
232; GENERIC-NEXT: leal (%rcx,%rsi), %eax
233; GENERIC-NEXT: retq
234;
235; SLM-LABEL: test_pcmpestri:
236; SLM: # BB#0:
237; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
238; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
239; SLM-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [21:21.00]
240; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
241; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
242; SLM-NEXT: movl %ecx, %esi # sched: [1:0.50]
243; SLM-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [21:21.00]
244; SLM-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
245; SLM-NEXT: leal (%rcx,%rsi), %eax # sched: [1:1.00]
246; SLM-NEXT: retq # sched: [4:1.00]
247;
248; SANDY-LABEL: test_pcmpestri:
249; SANDY: # BB#0:
250; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
251; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
252; SANDY-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
253; SANDY-NEXT: movl %ecx, %esi # sched: [1:0.33]
254; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
255; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
256; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
257; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
258; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
259; SANDY-NEXT: retq # sched: [5:1.00]
260;
261; HASWELL-LABEL: test_pcmpestri:
262; HASWELL: # BB#0:
263; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
264; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
265; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00]
266; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
267; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
268; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
269; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00]
270; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
271; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
272; HASWELL-NEXT: retq # sched: [1:1.00]
273;
274; BTVER2-LABEL: test_pcmpestri:
275; BTVER2: # BB#0:
276; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
277; BTVER2-NEXT: movl $7, %edx # sched: [1:0.17]
278; BTVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [13:2.50]
279; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
280; BTVER2-NEXT: movl $7, %edx # sched: [1:0.17]
281; BTVER2-NEXT: movl %ecx, %esi # sched: [1:0.17]
282; BTVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:2.50]
283; BTVER2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
284; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
285; BTVER2-NEXT: retq # sched: [4:1.00]
286 %1 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
287 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
288 %3 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %2, i32 7, i8 7)
289 %4 = add i32 %1, %3
290 ret i32 %4
291}
292declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
293
294define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
295; GENERIC-LABEL: test_pcmpestrm:
296; GENERIC: # BB#0:
297; GENERIC-NEXT: movl $7, %eax
298; GENERIC-NEXT: movl $7, %edx
299; GENERIC-NEXT: pcmpestrm $7, %xmm1, %xmm0
300; GENERIC-NEXT: movl $7, %eax
301; GENERIC-NEXT: movl $7, %edx
302; GENERIC-NEXT: pcmpestrm $7, (%rdi), %xmm0
303; GENERIC-NEXT: retq
304;
305; SLM-LABEL: test_pcmpestrm:
306; SLM: # BB#0:
307; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
308; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
309; SLM-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [17:17.00]
310; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
311; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
312; SLM-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00]
313; SLM-NEXT: retq # sched: [4:1.00]
314;
315; SANDY-LABEL: test_pcmpestrm:
316; SANDY: # BB#0:
317; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
318; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
319; SANDY-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
320; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
321; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
322; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
323; SANDY-NEXT: retq # sched: [5:1.00]
324;
325; HASWELL-LABEL: test_pcmpestrm:
326; HASWELL: # BB#0:
327; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
328; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
329; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
330; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
331; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
332; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00]
333; HASWELL-NEXT: retq # sched: [1:1.00]
334;
335; BTVER2-LABEL: test_pcmpestrm:
336; BTVER2: # BB#0:
337; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
338; BTVER2-NEXT: movl $7, %edx # sched: [1:0.17]
339; BTVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [13:2.50]
340; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
341; BTVER2-NEXT: movl $7, %edx # sched: [1:0.17]
342; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [18:2.50]
343; BTVER2-NEXT: retq # sched: [4:1.00]
344 %1 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
345 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
346 %3 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7)
347 ret <16 x i8> %3
348}
349declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
350
351define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
352; GENERIC-LABEL: test_pcmpistri:
353; GENERIC: # BB#0:
354; GENERIC-NEXT: pcmpistri $7, %xmm1, %xmm0
355; GENERIC-NEXT: movl %ecx, %eax
356; GENERIC-NEXT: pcmpistri $7, (%rdi), %xmm0
357; GENERIC-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
358; GENERIC-NEXT: leal (%rcx,%rax), %eax
359; GENERIC-NEXT: retq
360;
361; SLM-LABEL: test_pcmpistri:
362; SLM: # BB#0:
363; SLM-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [17:17.00]
364; SLM-NEXT: movl %ecx, %eax # sched: [1:0.50]
365; SLM-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:17.00]
366; SLM-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
367; SLM-NEXT: leal (%rcx,%rax), %eax # sched: [1:1.00]
368; SLM-NEXT: retq # sched: [4:1.00]
369;
370; SANDY-LABEL: test_pcmpistri:
371; SANDY: # BB#0:
372; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00]
373; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
374; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00]
375; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
376; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
377; SANDY-NEXT: retq # sched: [5:1.00]
378;
379; HASWELL-LABEL: test_pcmpistri:
380; HASWELL: # BB#0:
381; HASWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
382; HASWELL-NEXT: movl %ecx, %eax # sched: [1:0.25]
383; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
384; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
385; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
386; HASWELL-NEXT: retq # sched: [1:1.00]
387;
388; BTVER2-LABEL: test_pcmpistri:
389; BTVER2: # BB#0:
390; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [6:1.00]
391; BTVER2-NEXT: movl %ecx, %eax # sched: [1:0.17]
392; BTVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:1.00]
393; BTVER2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
394; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
395; BTVER2-NEXT: retq # sched: [4:1.00]
396 %1 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
397 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
398 %3 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %2, i8 7)
399 %4 = add i32 %1, %3
400 ret i32 %4
401}
402declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
403
404define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
405; GENERIC-LABEL: test_pcmpistrm:
406; GENERIC: # BB#0:
407; GENERIC-NEXT: pcmpistrm $7, %xmm1, %xmm0
408; GENERIC-NEXT: pcmpistrm $7, (%rdi), %xmm0
409; GENERIC-NEXT: retq
410;
411; SLM-LABEL: test_pcmpistrm:
412; SLM: # BB#0:
413; SLM-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [13:13.00]
414; SLM-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00]
415; SLM-NEXT: retq # sched: [4:1.00]
416;
417; SANDY-LABEL: test_pcmpistrm:
418; SANDY: # BB#0:
419; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00]
420; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
421; SANDY-NEXT: retq # sched: [5:1.00]
422;
423; HASWELL-LABEL: test_pcmpistrm:
424; HASWELL: # BB#0:
425; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
426; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
427; HASWELL-NEXT: retq # sched: [1:1.00]
428;
429; BTVER2-LABEL: test_pcmpistrm:
430; BTVER2: # BB#0:
431; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [7:1.00]
432; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [12:1.00]
433; BTVER2-NEXT: retq # sched: [4:1.00]
434 %1 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
435 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
436 %3 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %1, <16 x i8> %2, i8 7)
437 ret <16 x i8> %3
438}
439declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
440
441define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
442; GENERIC-LABEL: test_pcmpgtq:
443; GENERIC: # BB#0:
444; GENERIC-NEXT: pcmpgtq %xmm1, %xmm0
445; GENERIC-NEXT: pcmpgtq (%rdi), %xmm0
446; GENERIC-NEXT: retq
447;
448; SLM-LABEL: test_pcmpgtq:
449; SLM: # BB#0:
Simon Pilgrim06d62632017-04-23 21:23:27 +0000450; SLM-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
451; SLM-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [4:1.00]
Simon Pilgrim7d71ed52017-04-23 21:00:25 +0000452; SLM-NEXT: retq # sched: [4:1.00]
453;
454; SANDY-LABEL: test_pcmpgtq:
455; SANDY: # BB#0:
Simon Pilgrim06d62632017-04-23 21:23:27 +0000456; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
457; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
Simon Pilgrim7d71ed52017-04-23 21:00:25 +0000458; SANDY-NEXT: retq # sched: [5:1.00]
459;
460; HASWELL-LABEL: test_pcmpgtq:
461; HASWELL: # BB#0:
462; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
463; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
464; HASWELL-NEXT: retq # sched: [1:1.00]
465;
466; BTVER2-LABEL: test_pcmpgtq:
467; BTVER2: # BB#0:
Simon Pilgrim06d62632017-04-23 21:23:27 +0000468; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
469; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
Simon Pilgrim7d71ed52017-04-23 21:00:25 +0000470; BTVER2-NEXT: retq # sched: [4:1.00]
471 %1 = icmp sgt <2 x i64> %a0, %a1
472 %2 = sext <2 x i1> %1 to <2 x i64>
473 %3 = load <2 x i64>, <2 x i64>*%a2, align 16
474 %4 = icmp sgt <2 x i64> %2, %3
475 %5 = sext <2 x i1> %4 to <2 x i64>
476 ret <2 x i64> %5
477}