blob: c3c3a1db8171157caad53c89e6bb697dc6252bcd [file] [log] [blame]
Hiroshi Yamauchied50e602019-11-07 08:52:05 -08001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
7
8; This tests codegen time inlining/optimization of memcmp
9; rdar://6480398
10
11@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
12
13declare i32 @memcmp(i8*, i8*, i64)
14declare i32 @bcmp(i8*, i8*, i64)
15
16define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 {
17; X86-LABEL: length2:
18; X86: # %bb.0:
19; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
20; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
21; X86-NEXT: movzwl (%ecx), %ecx
22; X86-NEXT: movzwl (%eax), %edx
23; X86-NEXT: rolw $8, %cx
24; X86-NEXT: rolw $8, %dx
25; X86-NEXT: movzwl %cx, %eax
26; X86-NEXT: movzwl %dx, %ecx
27; X86-NEXT: subl %ecx, %eax
28; X86-NEXT: retl
29;
30; X64-LABEL: length2:
31; X64: # %bb.0:
32; X64-NEXT: movzwl (%rdi), %eax
33; X64-NEXT: movzwl (%rsi), %ecx
34; X64-NEXT: rolw $8, %ax
35; X64-NEXT: rolw $8, %cx
36; X64-NEXT: movzwl %ax, %eax
37; X64-NEXT: movzwl %cx, %ecx
38; X64-NEXT: subl %ecx, %eax
39; X64-NEXT: retq
40 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
41 ret i32 %m
42}
43
44define i1 @length2_eq(i8* %X, i8* %Y) nounwind !prof !14 {
45; X86-LABEL: length2_eq:
46; X86: # %bb.0:
47; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
48; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
49; X86-NEXT: movzwl (%ecx), %ecx
50; X86-NEXT: cmpw (%eax), %cx
51; X86-NEXT: sete %al
52; X86-NEXT: retl
53;
54; X64-LABEL: length2_eq:
55; X64: # %bb.0:
56; X64-NEXT: movzwl (%rdi), %eax
57; X64-NEXT: cmpw (%rsi), %ax
58; X64-NEXT: sete %al
59; X64-NEXT: retq
60 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
61 %c = icmp eq i32 %m, 0
62 ret i1 %c
63}
64
65define i1 @length2_eq_const(i8* %X) nounwind !prof !14 {
66; X86-LABEL: length2_eq_const:
67; X86: # %bb.0:
68; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
69; X86-NEXT: movzwl (%eax), %eax
70; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
71; X86-NEXT: setne %al
72; X86-NEXT: retl
73;
74; X64-LABEL: length2_eq_const:
75; X64: # %bb.0:
76; X64-NEXT: movzwl (%rdi), %eax
77; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
78; X64-NEXT: setne %al
79; X64-NEXT: retq
80 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
81 %c = icmp ne i32 %m, 0
82 ret i1 %c
83}
84
85define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind !prof !14 {
86; X86-LABEL: length2_eq_nobuiltin_attr:
87; X86: # %bb.0:
88; X86-NEXT: pushl $0
89; X86-NEXT: pushl $2
90; X86-NEXT: pushl {{[0-9]+}}(%esp)
91; X86-NEXT: pushl {{[0-9]+}}(%esp)
92; X86-NEXT: calll memcmp
93; X86-NEXT: addl $16, %esp
94; X86-NEXT: testl %eax, %eax
95; X86-NEXT: sete %al
96; X86-NEXT: retl
97;
98; X64-LABEL: length2_eq_nobuiltin_attr:
99; X64: # %bb.0:
100; X64-NEXT: pushq %rax
101; X64-NEXT: movl $2, %edx
102; X64-NEXT: callq memcmp
103; X64-NEXT: testl %eax, %eax
104; X64-NEXT: sete %al
105; X64-NEXT: popq %rcx
106; X64-NEXT: retq
107 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
108 %c = icmp eq i32 %m, 0
109 ret i1 %c
110}
111
112define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 {
113; X86-LABEL: length3:
114; X86: # %bb.0: # %loadbb
115; X86-NEXT: pushl %esi
116; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
118; X86-NEXT: movzwl (%eax), %edx
119; X86-NEXT: movzwl (%ecx), %esi
120; X86-NEXT: rolw $8, %dx
121; X86-NEXT: rolw $8, %si
122; X86-NEXT: cmpw %si, %dx
123; X86-NEXT: jne .LBB4_1
124; X86-NEXT: # %bb.2: # %loadbb1
125; X86-NEXT: movzbl 2(%eax), %eax
126; X86-NEXT: movzbl 2(%ecx), %ecx
127; X86-NEXT: subl %ecx, %eax
128; X86-NEXT: jmp .LBB4_3
129; X86-NEXT: .LBB4_1: # %res_block
130; X86-NEXT: setae %al
131; X86-NEXT: movzbl %al, %eax
132; X86-NEXT: leal -1(%eax,%eax), %eax
133; X86-NEXT: .LBB4_3: # %endblock
134; X86-NEXT: popl %esi
135; X86-NEXT: retl
136;
137; X64-LABEL: length3:
138; X64: # %bb.0: # %loadbb
139; X64-NEXT: movzwl (%rdi), %eax
140; X64-NEXT: movzwl (%rsi), %ecx
141; X64-NEXT: rolw $8, %ax
142; X64-NEXT: rolw $8, %cx
143; X64-NEXT: cmpw %cx, %ax
144; X64-NEXT: jne .LBB4_1
145; X64-NEXT: # %bb.2: # %loadbb1
146; X64-NEXT: movzbl 2(%rdi), %eax
147; X64-NEXT: movzbl 2(%rsi), %ecx
148; X64-NEXT: subl %ecx, %eax
149; X64-NEXT: retq
150; X64-NEXT: .LBB4_1: # %res_block
151; X64-NEXT: setae %al
152; X64-NEXT: movzbl %al, %eax
153; X64-NEXT: leal -1(%rax,%rax), %eax
154; X64-NEXT: retq
155 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
156 ret i32 %m
157}
158
159define i1 @length3_eq(i8* %X, i8* %Y) nounwind !prof !14 {
160; X86-LABEL: length3_eq:
161; X86: # %bb.0:
162; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
163; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
164; X86-NEXT: movzwl (%ecx), %edx
165; X86-NEXT: xorw (%eax), %dx
166; X86-NEXT: movb 2(%ecx), %cl
167; X86-NEXT: xorb 2(%eax), %cl
168; X86-NEXT: movzbl %cl, %eax
169; X86-NEXT: orw %dx, %ax
170; X86-NEXT: setne %al
171; X86-NEXT: retl
172;
173; X64-LABEL: length3_eq:
174; X64: # %bb.0:
175; X64-NEXT: movzwl (%rdi), %eax
176; X64-NEXT: xorw (%rsi), %ax
177; X64-NEXT: movb 2(%rdi), %cl
178; X64-NEXT: xorb 2(%rsi), %cl
179; X64-NEXT: movzbl %cl, %ecx
180; X64-NEXT: orw %ax, %cx
181; X64-NEXT: setne %al
182; X64-NEXT: retq
183 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
184 %c = icmp ne i32 %m, 0
185 ret i1 %c
186}
187
188define i32 @length4(i8* %X, i8* %Y) nounwind !prof !14 {
189; X86-LABEL: length4:
190; X86: # %bb.0:
191; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
192; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
193; X86-NEXT: movl (%ecx), %ecx
194; X86-NEXT: movl (%eax), %edx
195; X86-NEXT: bswapl %ecx
196; X86-NEXT: bswapl %edx
197; X86-NEXT: xorl %eax, %eax
198; X86-NEXT: cmpl %edx, %ecx
199; X86-NEXT: seta %al
200; X86-NEXT: sbbl $0, %eax
201; X86-NEXT: retl
202;
203; X64-LABEL: length4:
204; X64: # %bb.0:
205; X64-NEXT: movl (%rdi), %ecx
206; X64-NEXT: movl (%rsi), %edx
207; X64-NEXT: bswapl %ecx
208; X64-NEXT: bswapl %edx
209; X64-NEXT: xorl %eax, %eax
210; X64-NEXT: cmpl %edx, %ecx
211; X64-NEXT: seta %al
212; X64-NEXT: sbbl $0, %eax
213; X64-NEXT: retq
214 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
215 ret i32 %m
216}
217
218define i1 @length4_eq(i8* %X, i8* %Y) nounwind !prof !14 {
219; X86-LABEL: length4_eq:
220; X86: # %bb.0:
221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
222; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
223; X86-NEXT: movl (%ecx), %ecx
224; X86-NEXT: cmpl (%eax), %ecx
225; X86-NEXT: setne %al
226; X86-NEXT: retl
227;
228; X64-LABEL: length4_eq:
229; X64: # %bb.0:
230; X64-NEXT: movl (%rdi), %eax
231; X64-NEXT: cmpl (%rsi), %eax
232; X64-NEXT: setne %al
233; X64-NEXT: retq
234 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
235 %c = icmp ne i32 %m, 0
236 ret i1 %c
237}
238
239define i1 @length4_eq_const(i8* %X) nounwind !prof !14 {
240; X86-LABEL: length4_eq_const:
241; X86: # %bb.0:
242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
243; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
244; X86-NEXT: sete %al
245; X86-NEXT: retl
246;
247; X64-LABEL: length4_eq_const:
248; X64: # %bb.0:
249; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
250; X64-NEXT: sete %al
251; X64-NEXT: retq
252 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
253 %c = icmp eq i32 %m, 0
254 ret i1 %c
255}
256
257define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 {
258; X86-LABEL: length5:
259; X86: # %bb.0: # %loadbb
260; X86-NEXT: pushl %esi
261; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
262; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
263; X86-NEXT: movl (%eax), %edx
264; X86-NEXT: movl (%ecx), %esi
265; X86-NEXT: bswapl %edx
266; X86-NEXT: bswapl %esi
267; X86-NEXT: cmpl %esi, %edx
268; X86-NEXT: jne .LBB9_1
269; X86-NEXT: # %bb.2: # %loadbb1
270; X86-NEXT: movzbl 4(%eax), %eax
271; X86-NEXT: movzbl 4(%ecx), %ecx
272; X86-NEXT: subl %ecx, %eax
273; X86-NEXT: jmp .LBB9_3
274; X86-NEXT: .LBB9_1: # %res_block
275; X86-NEXT: setae %al
276; X86-NEXT: movzbl %al, %eax
277; X86-NEXT: leal -1(%eax,%eax), %eax
278; X86-NEXT: .LBB9_3: # %endblock
279; X86-NEXT: popl %esi
280; X86-NEXT: retl
281;
282; X64-LABEL: length5:
283; X64: # %bb.0: # %loadbb
284; X64-NEXT: movl (%rdi), %eax
285; X64-NEXT: movl (%rsi), %ecx
286; X64-NEXT: bswapl %eax
287; X64-NEXT: bswapl %ecx
288; X64-NEXT: cmpl %ecx, %eax
289; X64-NEXT: jne .LBB9_1
290; X64-NEXT: # %bb.2: # %loadbb1
291; X64-NEXT: movzbl 4(%rdi), %eax
292; X64-NEXT: movzbl 4(%rsi), %ecx
293; X64-NEXT: subl %ecx, %eax
294; X64-NEXT: retq
295; X64-NEXT: .LBB9_1: # %res_block
296; X64-NEXT: setae %al
297; X64-NEXT: movzbl %al, %eax
298; X64-NEXT: leal -1(%rax,%rax), %eax
299; X64-NEXT: retq
300 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
301 ret i32 %m
302}
303
304define i1 @length5_eq(i8* %X, i8* %Y) nounwind !prof !14 {
305; X86-LABEL: length5_eq:
306; X86: # %bb.0:
307; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
308; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
309; X86-NEXT: movl (%ecx), %edx
310; X86-NEXT: xorl (%eax), %edx
311; X86-NEXT: movb 4(%ecx), %cl
312; X86-NEXT: xorb 4(%eax), %cl
313; X86-NEXT: movzbl %cl, %eax
314; X86-NEXT: orl %edx, %eax
315; X86-NEXT: setne %al
316; X86-NEXT: retl
317;
318; X64-LABEL: length5_eq:
319; X64: # %bb.0:
320; X64-NEXT: movl (%rdi), %eax
321; X64-NEXT: xorl (%rsi), %eax
322; X64-NEXT: movb 4(%rdi), %cl
323; X64-NEXT: xorb 4(%rsi), %cl
324; X64-NEXT: movzbl %cl, %ecx
325; X64-NEXT: orl %eax, %ecx
326; X64-NEXT: setne %al
327; X64-NEXT: retq
328 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
329 %c = icmp ne i32 %m, 0
330 ret i1 %c
331}
332
333define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 {
334; X86-LABEL: length8:
335; X86: # %bb.0:
336; X86-NEXT: pushl %esi
337; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
338; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
339; X86-NEXT: movl (%esi), %ecx
340; X86-NEXT: movl (%eax), %edx
341; X86-NEXT: bswapl %ecx
342; X86-NEXT: bswapl %edx
343; X86-NEXT: cmpl %edx, %ecx
344; X86-NEXT: jne .LBB11_2
345; X86-NEXT: # %bb.1: # %loadbb1
346; X86-NEXT: movl 4(%esi), %ecx
347; X86-NEXT: movl 4(%eax), %edx
348; X86-NEXT: bswapl %ecx
349; X86-NEXT: bswapl %edx
350; X86-NEXT: xorl %eax, %eax
351; X86-NEXT: cmpl %edx, %ecx
352; X86-NEXT: je .LBB11_3
353; X86-NEXT: .LBB11_2: # %res_block
354; X86-NEXT: xorl %eax, %eax
355; X86-NEXT: cmpl %edx, %ecx
356; X86-NEXT: setae %al
357; X86-NEXT: leal -1(%eax,%eax), %eax
358; X86-NEXT: .LBB11_3: # %endblock
359; X86-NEXT: popl %esi
360; X86-NEXT: retl
361;
362; X64-LABEL: length8:
363; X64: # %bb.0:
364; X64-NEXT: movq (%rdi), %rcx
365; X64-NEXT: movq (%rsi), %rdx
366; X64-NEXT: bswapq %rcx
367; X64-NEXT: bswapq %rdx
368; X64-NEXT: xorl %eax, %eax
369; X64-NEXT: cmpq %rdx, %rcx
370; X64-NEXT: seta %al
371; X64-NEXT: sbbl $0, %eax
372; X64-NEXT: retq
373 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
374 ret i32 %m
375}
376
377define i1 @length8_eq(i8* %X, i8* %Y) nounwind !prof !14 {
378; X86-LABEL: length8_eq:
379; X86: # %bb.0:
380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
381; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
382; X86-NEXT: movl (%ecx), %edx
383; X86-NEXT: movl 4(%ecx), %ecx
384; X86-NEXT: xorl (%eax), %edx
385; X86-NEXT: xorl 4(%eax), %ecx
386; X86-NEXT: orl %edx, %ecx
387; X86-NEXT: sete %al
388; X86-NEXT: retl
389;
390; X64-LABEL: length8_eq:
391; X64: # %bb.0:
392; X64-NEXT: movq (%rdi), %rax
393; X64-NEXT: cmpq (%rsi), %rax
394; X64-NEXT: sete %al
395; X64-NEXT: retq
396 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
397 %c = icmp eq i32 %m, 0
398 ret i1 %c
399}
400
401define i1 @length8_eq_const(i8* %X) nounwind !prof !14 {
402; X86-LABEL: length8_eq_const:
403; X86: # %bb.0:
404; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
405; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
406; X86-NEXT: xorl (%eax), %ecx
407; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
408; X86-NEXT: xorl 4(%eax), %edx
409; X86-NEXT: orl %ecx, %edx
410; X86-NEXT: setne %al
411; X86-NEXT: retl
412;
413; X64-LABEL: length8_eq_const:
414; X64: # %bb.0:
415; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
416; X64-NEXT: cmpq %rax, (%rdi)
417; X64-NEXT: setne %al
418; X64-NEXT: retq
419 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
420 %c = icmp ne i32 %m, 0
421 ret i1 %c
422}
423
424define i1 @length12_eq(i8* %X, i8* %Y) nounwind !prof !14 {
425; X86-LABEL: length12_eq:
426; X86: # %bb.0:
427; X86-NEXT: pushl $0
428; X86-NEXT: pushl $12
429; X86-NEXT: pushl {{[0-9]+}}(%esp)
430; X86-NEXT: pushl {{[0-9]+}}(%esp)
431; X86-NEXT: calll memcmp
432; X86-NEXT: addl $16, %esp
433; X86-NEXT: testl %eax, %eax
434; X86-NEXT: setne %al
435; X86-NEXT: retl
436;
437; X64-LABEL: length12_eq:
438; X64: # %bb.0:
439; X64-NEXT: movq (%rdi), %rax
440; X64-NEXT: xorq (%rsi), %rax
441; X64-NEXT: movl 8(%rdi), %ecx
442; X64-NEXT: xorl 8(%rsi), %ecx
443; X64-NEXT: orq %rax, %rcx
444; X64-NEXT: setne %al
445; X64-NEXT: retq
446 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
447 %c = icmp ne i32 %m, 0
448 ret i1 %c
449}
450
451define i32 @length12(i8* %X, i8* %Y) nounwind !prof !14 {
452; X86-LABEL: length12:
453; X86: # %bb.0:
454; X86-NEXT: pushl $0
455; X86-NEXT: pushl $12
456; X86-NEXT: pushl {{[0-9]+}}(%esp)
457; X86-NEXT: pushl {{[0-9]+}}(%esp)
458; X86-NEXT: calll memcmp
459; X86-NEXT: addl $16, %esp
460; X86-NEXT: retl
461;
462; X64-LABEL: length12:
463; X64: # %bb.0:
464; X64-NEXT: movq (%rdi), %rcx
465; X64-NEXT: movq (%rsi), %rdx
466; X64-NEXT: bswapq %rcx
467; X64-NEXT: bswapq %rdx
468; X64-NEXT: cmpq %rdx, %rcx
469; X64-NEXT: jne .LBB15_2
470; X64-NEXT: # %bb.1: # %loadbb1
471; X64-NEXT: movl 8(%rdi), %ecx
472; X64-NEXT: movl 8(%rsi), %edx
473; X64-NEXT: bswapl %ecx
474; X64-NEXT: bswapl %edx
475; X64-NEXT: xorl %eax, %eax
476; X64-NEXT: cmpq %rdx, %rcx
477; X64-NEXT: je .LBB15_3
478; X64-NEXT: .LBB15_2: # %res_block
479; X64-NEXT: xorl %eax, %eax
480; X64-NEXT: cmpq %rdx, %rcx
481; X64-NEXT: setae %al
482; X64-NEXT: leal -1(%rax,%rax), %eax
483; X64-NEXT: .LBB15_3: # %endblock
484; X64-NEXT: retq
485 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
486 ret i32 %m
487}
488
489; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
490
491define i32 @length16(i8* %X, i8* %Y) nounwind !prof !14 {
492; X86-LABEL: length16:
493; X86: # %bb.0:
494; X86-NEXT: pushl $0
495; X86-NEXT: pushl $16
496; X86-NEXT: pushl {{[0-9]+}}(%esp)
497; X86-NEXT: pushl {{[0-9]+}}(%esp)
498; X86-NEXT: calll memcmp
499; X86-NEXT: addl $16, %esp
500; X86-NEXT: retl
501;
502; X64-LABEL: length16:
503; X64: # %bb.0:
504; X64-NEXT: movq (%rdi), %rcx
505; X64-NEXT: movq (%rsi), %rdx
506; X64-NEXT: bswapq %rcx
507; X64-NEXT: bswapq %rdx
508; X64-NEXT: cmpq %rdx, %rcx
509; X64-NEXT: jne .LBB16_2
510; X64-NEXT: # %bb.1: # %loadbb1
511; X64-NEXT: movq 8(%rdi), %rcx
512; X64-NEXT: movq 8(%rsi), %rdx
513; X64-NEXT: bswapq %rcx
514; X64-NEXT: bswapq %rdx
515; X64-NEXT: xorl %eax, %eax
516; X64-NEXT: cmpq %rdx, %rcx
517; X64-NEXT: je .LBB16_3
518; X64-NEXT: .LBB16_2: # %res_block
519; X64-NEXT: xorl %eax, %eax
520; X64-NEXT: cmpq %rdx, %rcx
521; X64-NEXT: setae %al
522; X64-NEXT: leal -1(%rax,%rax), %eax
523; X64-NEXT: .LBB16_3: # %endblock
524; X64-NEXT: retq
525 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
526 ret i32 %m
527}
528
529define i1 @length16_eq(i8* %x, i8* %y) nounwind !prof !14 {
530; X86-NOSSE-LABEL: length16_eq:
531; X86-NOSSE: # %bb.0:
532; X86-NOSSE-NEXT: pushl $0
533; X86-NOSSE-NEXT: pushl $16
534; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
535; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
536; X86-NOSSE-NEXT: calll memcmp
537; X86-NOSSE-NEXT: addl $16, %esp
538; X86-NOSSE-NEXT: testl %eax, %eax
539; X86-NOSSE-NEXT: setne %al
540; X86-NOSSE-NEXT: retl
541;
542; X86-SSE2-LABEL: length16_eq:
543; X86-SSE2: # %bb.0:
544; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
545; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
546; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
547; X86-SSE2-NEXT: movdqu (%eax), %xmm1
548; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
549; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
550; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
551; X86-SSE2-NEXT: setne %al
552; X86-SSE2-NEXT: retl
553;
554; X64-SSE2-LABEL: length16_eq:
555; X64-SSE2: # %bb.0:
556; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
557; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
558; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
559; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
560; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
561; X64-SSE2-NEXT: setne %al
562; X64-SSE2-NEXT: retq
563;
564; X64-AVX-LABEL: length16_eq:
565; X64-AVX: # %bb.0:
566; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
567; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
568; X64-AVX-NEXT: vptest %xmm0, %xmm0
569; X64-AVX-NEXT: setne %al
570; X64-AVX-NEXT: retq
571 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
572 %cmp = icmp ne i32 %call, 0
573 ret i1 %cmp
574}
575
576define i1 @length16_eq_const(i8* %X) nounwind !prof !14 {
577; X86-NOSSE-LABEL: length16_eq_const:
578; X86-NOSSE: # %bb.0:
579; X86-NOSSE-NEXT: pushl $0
580; X86-NOSSE-NEXT: pushl $16
581; X86-NOSSE-NEXT: pushl $.L.str
582; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
583; X86-NOSSE-NEXT: calll memcmp
584; X86-NOSSE-NEXT: addl $16, %esp
585; X86-NOSSE-NEXT: testl %eax, %eax
586; X86-NOSSE-NEXT: sete %al
587; X86-NOSSE-NEXT: retl
588;
589; X86-SSE2-LABEL: length16_eq_const:
590; X86-SSE2: # %bb.0:
591; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
592; X86-SSE2-NEXT: movdqu (%eax), %xmm0
593; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
594; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
595; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
596; X86-SSE2-NEXT: sete %al
597; X86-SSE2-NEXT: retl
598;
599; X64-SSE2-LABEL: length16_eq_const:
600; X64-SSE2: # %bb.0:
601; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
602; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
603; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
604; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
605; X64-SSE2-NEXT: sete %al
606; X64-SSE2-NEXT: retq
607;
608; X64-AVX-LABEL: length16_eq_const:
609; X64-AVX: # %bb.0:
610; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
611; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
612; X64-AVX-NEXT: vptest %xmm0, %xmm0
613; X64-AVX-NEXT: sete %al
614; X64-AVX-NEXT: retq
615 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
616 %c = icmp eq i32 %m, 0
617 ret i1 %c
618}
619
620; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
621
622define i32 @length24(i8* %X, i8* %Y) nounwind !prof !14 {
623; X86-LABEL: length24:
624; X86: # %bb.0:
625; X86-NEXT: pushl $0
626; X86-NEXT: pushl $24
627; X86-NEXT: pushl {{[0-9]+}}(%esp)
628; X86-NEXT: pushl {{[0-9]+}}(%esp)
629; X86-NEXT: calll memcmp
630; X86-NEXT: addl $16, %esp
631; X86-NEXT: retl
632;
633; X64-LABEL: length24:
634; X64: # %bb.0:
635; X64-NEXT: movl $24, %edx
636; X64-NEXT: jmp memcmp # TAILCALL
637 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
638 ret i32 %m
639}
640
641define i1 @length24_eq(i8* %x, i8* %y) nounwind !prof !14 {
642; X86-NOSSE-LABEL: length24_eq:
643; X86-NOSSE: # %bb.0:
644; X86-NOSSE-NEXT: pushl $0
645; X86-NOSSE-NEXT: pushl $24
646; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
647; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
648; X86-NOSSE-NEXT: calll memcmp
649; X86-NOSSE-NEXT: addl $16, %esp
650; X86-NOSSE-NEXT: testl %eax, %eax
651; X86-NOSSE-NEXT: sete %al
652; X86-NOSSE-NEXT: retl
653;
654; X86-SSE2-LABEL: length24_eq:
655; X86-SSE2: # %bb.0:
656; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
657; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
658; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
659; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
660; X86-SSE2-NEXT: movdqu (%eax), %xmm2
661; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
662; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
663; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
664; X86-SSE2-NEXT: pand %xmm2, %xmm0
665; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
666; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
667; X86-SSE2-NEXT: sete %al
668; X86-SSE2-NEXT: retl
669;
670; X64-SSE2-LABEL: length24_eq:
671; X64-SSE2: # %bb.0:
672; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
673; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
674; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
675; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
676; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
677; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
678; X64-SSE2-NEXT: pand %xmm1, %xmm2
679; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
680; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
681; X64-SSE2-NEXT: sete %al
682; X64-SSE2-NEXT: retq
683;
684; X64-AVX-LABEL: length24_eq:
685; X64-AVX: # %bb.0:
686; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
687; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
688; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
689; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
690; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
691; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
692; X64-AVX-NEXT: vptest %xmm0, %xmm0
693; X64-AVX-NEXT: sete %al
694; X64-AVX-NEXT: retq
695 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
696 %cmp = icmp eq i32 %call, 0
697 ret i1 %cmp
698}
699
700define i1 @length24_eq_const(i8* %X) nounwind !prof !14 {
701; X86-NOSSE-LABEL: length24_eq_const:
702; X86-NOSSE: # %bb.0:
703; X86-NOSSE-NEXT: pushl $0
704; X86-NOSSE-NEXT: pushl $24
705; X86-NOSSE-NEXT: pushl $.L.str
706; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
707; X86-NOSSE-NEXT: calll memcmp
708; X86-NOSSE-NEXT: addl $16, %esp
709; X86-NOSSE-NEXT: testl %eax, %eax
710; X86-NOSSE-NEXT: setne %al
711; X86-NOSSE-NEXT: retl
712;
713; X86-SSE2-LABEL: length24_eq_const:
714; X86-SSE2: # %bb.0:
715; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
716; X86-SSE2-NEXT: movdqu (%eax), %xmm0
717; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
718; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
719; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
720; X86-SSE2-NEXT: pand %xmm1, %xmm0
721; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
722; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
723; X86-SSE2-NEXT: setne %al
724; X86-SSE2-NEXT: retl
725;
726; X64-SSE2-LABEL: length24_eq_const:
727; X64-SSE2: # %bb.0:
728; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
729; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
730; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
731; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
732; X64-SSE2-NEXT: pand %xmm1, %xmm0
733; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
734; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
735; X64-SSE2-NEXT: setne %al
736; X64-SSE2-NEXT: retq
737;
738; X64-AVX-LABEL: length24_eq_const:
739; X64-AVX: # %bb.0:
740; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
741; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
742; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1
743; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
744; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
745; X64-AVX-NEXT: vptest %xmm0, %xmm0
746; X64-AVX-NEXT: setne %al
747; X64-AVX-NEXT: retq
748 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
749 %c = icmp ne i32 %m, 0
750 ret i1 %c
751}
752
753define i32 @length32(i8* %X, i8* %Y) nounwind !prof !14 {
754; X86-LABEL: length32:
755; X86: # %bb.0:
756; X86-NEXT: pushl $0
757; X86-NEXT: pushl $32
758; X86-NEXT: pushl {{[0-9]+}}(%esp)
759; X86-NEXT: pushl {{[0-9]+}}(%esp)
760; X86-NEXT: calll memcmp
761; X86-NEXT: addl $16, %esp
762; X86-NEXT: retl
763;
764; X64-LABEL: length32:
765; X64: # %bb.0:
766; X64-NEXT: movl $32, %edx
767; X64-NEXT: jmp memcmp # TAILCALL
768 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
769 ret i32 %m
770}
771
772; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
773
774define i1 @length32_eq(i8* %x, i8* %y) nounwind !prof !14 {
775; X86-NOSSE-LABEL: length32_eq:
776; X86-NOSSE: # %bb.0:
777; X86-NOSSE-NEXT: pushl $0
778; X86-NOSSE-NEXT: pushl $32
779; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
780; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
781; X86-NOSSE-NEXT: calll memcmp
782; X86-NOSSE-NEXT: addl $16, %esp
783; X86-NOSSE-NEXT: testl %eax, %eax
784; X86-NOSSE-NEXT: sete %al
785; X86-NOSSE-NEXT: retl
786;
787; X86-SSE2-LABEL: length32_eq:
788; X86-SSE2: # %bb.0:
789; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
790; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
791; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
792; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
793; X86-SSE2-NEXT: movdqu (%eax), %xmm2
794; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
795; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
796; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
797; X86-SSE2-NEXT: pand %xmm2, %xmm0
798; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
799; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
800; X86-SSE2-NEXT: sete %al
801; X86-SSE2-NEXT: retl
802;
803; X64-SSE2-LABEL: length32_eq:
804; X64-SSE2: # %bb.0:
805; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
806; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
807; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
808; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
809; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
810; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
811; X64-SSE2-NEXT: pand %xmm2, %xmm0
812; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
813; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
814; X64-SSE2-NEXT: sete %al
815; X64-SSE2-NEXT: retq
816;
817; X64-AVX1-LABEL: length32_eq:
818; X64-AVX1: # %bb.0:
819; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
820; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
821; X64-AVX1-NEXT: vptest %ymm0, %ymm0
822; X64-AVX1-NEXT: sete %al
823; X64-AVX1-NEXT: vzeroupper
824; X64-AVX1-NEXT: retq
825;
826; X64-AVX2-LABEL: length32_eq:
827; X64-AVX2: # %bb.0:
828; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
829; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
830; X64-AVX2-NEXT: vptest %ymm0, %ymm0
831; X64-AVX2-NEXT: sete %al
832; X64-AVX2-NEXT: vzeroupper
833; X64-AVX2-NEXT: retq
834 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
835 %cmp = icmp eq i32 %call, 0
836 ret i1 %cmp
837}
838
839define i1 @length32_eq_const(i8* %X) nounwind !prof !14 {
840; X86-NOSSE-LABEL: length32_eq_const:
841; X86-NOSSE: # %bb.0:
842; X86-NOSSE-NEXT: pushl $0
843; X86-NOSSE-NEXT: pushl $32
844; X86-NOSSE-NEXT: pushl $.L.str
845; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
846; X86-NOSSE-NEXT: calll memcmp
847; X86-NOSSE-NEXT: addl $16, %esp
848; X86-NOSSE-NEXT: testl %eax, %eax
849; X86-NOSSE-NEXT: setne %al
850; X86-NOSSE-NEXT: retl
851;
852; X86-SSE2-LABEL: length32_eq_const:
853; X86-SSE2: # %bb.0:
854; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
855; X86-SSE2-NEXT: movdqu (%eax), %xmm0
856; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
857; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
858; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
859; X86-SSE2-NEXT: pand %xmm1, %xmm0
860; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
861; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
862; X86-SSE2-NEXT: setne %al
863; X86-SSE2-NEXT: retl
864;
865; X64-SSE2-LABEL: length32_eq_const:
866; X64-SSE2: # %bb.0:
867; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
868; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
869; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
870; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
871; X64-SSE2-NEXT: pand %xmm1, %xmm0
872; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
873; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
874; X64-SSE2-NEXT: setne %al
875; X64-SSE2-NEXT: retq
876;
877; X64-AVX1-LABEL: length32_eq_const:
878; X64-AVX1: # %bb.0:
879; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
880; X64-AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
881; X64-AVX1-NEXT: vptest %ymm0, %ymm0
882; X64-AVX1-NEXT: setne %al
883; X64-AVX1-NEXT: vzeroupper
884; X64-AVX1-NEXT: retq
885;
886; X64-AVX2-LABEL: length32_eq_const:
887; X64-AVX2: # %bb.0:
888; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
889; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
890; X64-AVX2-NEXT: vptest %ymm0, %ymm0
891; X64-AVX2-NEXT: setne %al
892; X64-AVX2-NEXT: vzeroupper
893; X64-AVX2-NEXT: retq
894 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
895 %c = icmp ne i32 %m, 0
896 ret i1 %c
897}
898
899define i32 @length64(i8* %X, i8* %Y) nounwind !prof !14 {
900; X86-LABEL: length64:
901; X86: # %bb.0:
902; X86-NEXT: pushl $0
903; X86-NEXT: pushl $64
904; X86-NEXT: pushl {{[0-9]+}}(%esp)
905; X86-NEXT: pushl {{[0-9]+}}(%esp)
906; X86-NEXT: calll memcmp
907; X86-NEXT: addl $16, %esp
908; X86-NEXT: retl
909;
910; X64-LABEL: length64:
911; X64: # %bb.0:
912; X64-NEXT: movl $64, %edx
913; X64-NEXT: jmp memcmp # TAILCALL
914 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
915 ret i32 %m
916}
917
918define i1 @length64_eq(i8* %x, i8* %y) nounwind !prof !14 {
919; X86-LABEL: length64_eq:
920; X86: # %bb.0:
921; X86-NEXT: pushl $0
922; X86-NEXT: pushl $64
923; X86-NEXT: pushl {{[0-9]+}}(%esp)
924; X86-NEXT: pushl {{[0-9]+}}(%esp)
925; X86-NEXT: calll memcmp
926; X86-NEXT: addl $16, %esp
927; X86-NEXT: testl %eax, %eax
928; X86-NEXT: setne %al
929; X86-NEXT: retl
930;
931; X64-SSE2-LABEL: length64_eq:
932; X64-SSE2: # %bb.0:
933; X64-SSE2-NEXT: pushq %rax
934; X64-SSE2-NEXT: movl $64, %edx
935; X64-SSE2-NEXT: callq memcmp
936; X64-SSE2-NEXT: testl %eax, %eax
937; X64-SSE2-NEXT: setne %al
938; X64-SSE2-NEXT: popq %rcx
939; X64-SSE2-NEXT: retq
940;
941; X64-AVX1-LABEL: length64_eq:
942; X64-AVX1: # %bb.0:
943; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
944; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
945; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
946; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
947; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
948; X64-AVX1-NEXT: vptest %ymm0, %ymm0
949; X64-AVX1-NEXT: setne %al
950; X64-AVX1-NEXT: vzeroupper
951; X64-AVX1-NEXT: retq
952;
953; X64-AVX2-LABEL: length64_eq:
954; X64-AVX2: # %bb.0:
955; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
956; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
957; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
958; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
959; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
960; X64-AVX2-NEXT: vptest %ymm0, %ymm0
961; X64-AVX2-NEXT: setne %al
962; X64-AVX2-NEXT: vzeroupper
963; X64-AVX2-NEXT: retq
964 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
965 %cmp = icmp ne i32 %call, 0
966 ret i1 %cmp
967}
968
969define i1 @length64_eq_const(i8* %X) nounwind !prof !14 {
970; X86-LABEL: length64_eq_const:
971; X86: # %bb.0:
972; X86-NEXT: pushl $0
973; X86-NEXT: pushl $64
974; X86-NEXT: pushl $.L.str
975; X86-NEXT: pushl {{[0-9]+}}(%esp)
976; X86-NEXT: calll memcmp
977; X86-NEXT: addl $16, %esp
978; X86-NEXT: testl %eax, %eax
979; X86-NEXT: sete %al
980; X86-NEXT: retl
981;
982; X64-SSE2-LABEL: length64_eq_const:
983; X64-SSE2: # %bb.0:
984; X64-SSE2-NEXT: pushq %rax
985; X64-SSE2-NEXT: movl $.L.str, %esi
986; X64-SSE2-NEXT: movl $64, %edx
987; X64-SSE2-NEXT: callq memcmp
988; X64-SSE2-NEXT: testl %eax, %eax
989; X64-SSE2-NEXT: sete %al
990; X64-SSE2-NEXT: popq %rcx
991; X64-SSE2-NEXT: retq
992;
993; X64-AVX1-LABEL: length64_eq_const:
994; X64-AVX1: # %bb.0:
995; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
996; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
997; X64-AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
998; X64-AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
999; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1000; X64-AVX1-NEXT: vptest %ymm0, %ymm0
1001; X64-AVX1-NEXT: sete %al
1002; X64-AVX1-NEXT: vzeroupper
1003; X64-AVX1-NEXT: retq
1004;
1005; X64-AVX2-LABEL: length64_eq_const:
1006; X64-AVX2: # %bb.0:
1007; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1008; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
1009; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
1010; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
1011; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1012; X64-AVX2-NEXT: vptest %ymm0, %ymm0
1013; X64-AVX2-NEXT: sete %al
1014; X64-AVX2-NEXT: vzeroupper
1015; X64-AVX2-NEXT: retq
1016 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
1017 %c = icmp eq i32 %m, 0
1018 ret i1 %c
1019}
1020
1021define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {
1022; X86-LABEL: bcmp_length2:
1023; X86: # %bb.0:
1024; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1025; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1026; X86-NEXT: movzwl (%ecx), %ecx
1027; X86-NEXT: movzwl (%eax), %edx
1028; X86-NEXT: rolw $8, %cx
1029; X86-NEXT: rolw $8, %dx
1030; X86-NEXT: movzwl %cx, %eax
1031; X86-NEXT: movzwl %dx, %ecx
1032; X86-NEXT: subl %ecx, %eax
1033; X86-NEXT: retl
1034;
1035; X64-LABEL: bcmp_length2:
1036; X64: # %bb.0:
1037; X64-NEXT: movzwl (%rdi), %eax
1038; X64-NEXT: movzwl (%rsi), %ecx
1039; X64-NEXT: rolw $8, %ax
1040; X64-NEXT: rolw $8, %cx
1041; X64-NEXT: movzwl %ax, %eax
1042; X64-NEXT: movzwl %cx, %ecx
1043; X64-NEXT: subl %ecx, %eax
1044; X64-NEXT: retq
1045 %m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
1046 ret i32 %m
1047}
1048
1049!llvm.module.flags = !{!0}
1050!0 = !{i32 1, !"ProfileSummary", !1}
1051!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
1052!2 = !{!"ProfileFormat", !"InstrProf"}
1053!3 = !{!"TotalCount", i64 10000}
1054!4 = !{!"MaxCount", i64 10}
1055!5 = !{!"MaxInternalCount", i64 1}
1056!6 = !{!"MaxFunctionCount", i64 1000}
1057!7 = !{!"NumCounts", i64 3}
1058!8 = !{!"NumFunctions", i64 3}
1059!9 = !{!"DetailedSummary", !10}
1060!10 = !{!11, !12, !13}
1061!11 = !{i32 10000, i64 100, i32 1}
1062!12 = !{i32 999000, i64 100, i32 1}
1063!13 = !{i32 999999, i64 1, i32 2}
1064!14 = !{!"function_entry_count", i64 0}