blob: 3f5eeba7055cd1a8b5611fb686876058d9b8eb48 [file] [log] [blame]
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
6
7; This tests codegen time inlining/optimization of memcmp
8; rdar://6480398
9
10@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
11
12declare i32 @memcmp(i8*, i8*, i64)
13
14define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
15; X86-LABEL: length2:
16; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000017; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
18; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
19; X86-NEXT: movzwl (%ecx), %ecx
20; X86-NEXT: movzwl (%eax), %edx
21; X86-NEXT: rolw $8, %cx
22; X86-NEXT: rolw $8, %dx
Sanjay Patelfea731a2017-07-31 18:08:24 +000023; X86-NEXT: movzwl %cx, %eax
24; X86-NEXT: movzwl %dx, %ecx
25; X86-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000026; X86-NEXT: retl
27;
28; X64-LABEL: length2:
29; X64: # BB#0:
30; X64-NEXT: movzwl (%rdi), %eax
31; X64-NEXT: movzwl (%rsi), %ecx
32; X64-NEXT: rolw $8, %ax
33; X64-NEXT: rolw $8, %cx
Sanjay Patelfea731a2017-07-31 18:08:24 +000034; X64-NEXT: movzwl %ax, %eax
35; X64-NEXT: movzwl %cx, %ecx
36; X64-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000037; X64-NEXT: retq
38 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
39 ret i32 %m
40}
41
42define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {
43; X86-LABEL: length2_eq:
44; X86: # BB#0:
45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
47; X86-NEXT: movzwl (%ecx), %ecx
48; X86-NEXT: cmpw (%eax), %cx
49; X86-NEXT: sete %al
50; X86-NEXT: retl
51;
52; X64-LABEL: length2_eq:
53; X64: # BB#0:
54; X64-NEXT: movzwl (%rdi), %eax
55; X64-NEXT: cmpw (%rsi), %ax
56; X64-NEXT: sete %al
57; X64-NEXT: retq
58 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
59 %c = icmp eq i32 %m, 0
60 ret i1 %c
61}
62
63define i1 @length2_eq_const(i8* %X) nounwind optsize {
64; X86-LABEL: length2_eq_const:
65; X86: # BB#0:
66; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT: movzwl (%eax), %eax
68; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
69; X86-NEXT: setne %al
70; X86-NEXT: retl
71;
72; X64-LABEL: length2_eq_const:
73; X64: # BB#0:
74; X64-NEXT: movzwl (%rdi), %eax
75; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
76; X64-NEXT: setne %al
77; X64-NEXT: retq
78 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
79 %c = icmp ne i32 %m, 0
80 ret i1 %c
81}
82
83define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind optsize {
84; X86-LABEL: length2_eq_nobuiltin_attr:
85; X86: # BB#0:
86; X86-NEXT: pushl $0
87; X86-NEXT: pushl $2
88; X86-NEXT: pushl {{[0-9]+}}(%esp)
89; X86-NEXT: pushl {{[0-9]+}}(%esp)
90; X86-NEXT: calll memcmp
91; X86-NEXT: addl $16, %esp
92; X86-NEXT: testl %eax, %eax
93; X86-NEXT: sete %al
94; X86-NEXT: retl
95;
96; X64-LABEL: length2_eq_nobuiltin_attr:
97; X64: # BB#0:
98; X64-NEXT: pushq %rax
99; X64-NEXT: movl $2, %edx
100; X64-NEXT: callq memcmp
101; X64-NEXT: testl %eax, %eax
102; X64-NEXT: sete %al
103; X64-NEXT: popq %rcx
104; X64-NEXT: retq
105 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
106 %c = icmp eq i32 %m, 0
107 ret i1 %c
108}
109
110define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
111; X86-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000112; X86: # BB#0: # %loadbb
113; X86-NEXT: pushl %esi
114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT: movzwl (%eax), %edx
117; X86-NEXT: movzwl (%ecx), %esi
118; X86-NEXT: rolw $8, %dx
119; X86-NEXT: rolw $8, %si
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000120; X86-NEXT: cmpw %si, %dx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000121; X86-NEXT: jne .LBB4_1
122; X86-NEXT: # BB#2: # %loadbb1
123; X86-NEXT: movzbl 2(%eax), %eax
124; X86-NEXT: movzbl 2(%ecx), %ecx
125; X86-NEXT: subl %ecx, %eax
126; X86-NEXT: jmp .LBB4_3
127; X86-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000128; X86-NEXT: setae %al
129; X86-NEXT: movzbl %al, %eax
130; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000131; X86-NEXT: .LBB4_3: # %endblock
132; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000133; X86-NEXT: retl
134;
135; X64-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000136; X64: # BB#0: # %loadbb
137; X64-NEXT: movzwl (%rdi), %eax
138; X64-NEXT: movzwl (%rsi), %ecx
139; X64-NEXT: rolw $8, %ax
140; X64-NEXT: rolw $8, %cx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000141; X64-NEXT: cmpw %cx, %ax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000142; X64-NEXT: jne .LBB4_1
143; X64-NEXT: # BB#2: # %loadbb1
144; X64-NEXT: movzbl 2(%rdi), %eax
145; X64-NEXT: movzbl 2(%rsi), %ecx
146; X64-NEXT: subl %ecx, %eax
147; X64-NEXT: retq
148; X64-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000149; X64-NEXT: setae %al
150; X64-NEXT: movzbl %al, %eax
151; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000152; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000153 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
154 ret i32 %m
155}
156
157define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
158; X86-LABEL: length3_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000159; X86: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000161; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
162; X86-NEXT: movzwl (%ecx), %edx
163; X86-NEXT: cmpw (%eax), %dx
164; X86-NEXT: jne .LBB5_2
165; X86-NEXT: # BB#1: # %loadbb1
166; X86-NEXT: movb 2(%ecx), %dl
167; X86-NEXT: xorl %ecx, %ecx
168; X86-NEXT: cmpb 2(%eax), %dl
Simon Pilgrim483927a2017-07-18 15:55:30 +0000169; X86-NEXT: je .LBB5_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000170; X86-NEXT: .LBB5_2: # %res_block
171; X86-NEXT: xorl %ecx, %ecx
172; X86-NEXT: incl %ecx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000173; X86-NEXT: .LBB5_3: # %endblock
Clement Courbet1dc37b92017-11-02 15:02:51 +0000174; X86-NEXT: testl %ecx, %ecx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000175; X86-NEXT: setne %al
176; X86-NEXT: retl
177;
178; X64-LABEL: length3_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000179; X64: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000180; X64-NEXT: movzwl (%rdi), %eax
181; X64-NEXT: cmpw (%rsi), %ax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000182; X64-NEXT: jne .LBB5_2
183; X64-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000184; X64-NEXT: movb 2(%rdi), %cl
185; X64-NEXT: xorl %eax, %eax
186; X64-NEXT: cmpb 2(%rsi), %cl
187; X64-NEXT: je .LBB5_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000188; X64-NEXT: .LBB5_2: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000189; X64-NEXT: movl $1, %eax
190; X64-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000191; X64-NEXT: testl %eax, %eax
192; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000193; X64-NEXT: retq
194 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
195 %c = icmp ne i32 %m, 0
196 ret i1 %c
197}
198
199define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
200; X86-LABEL: length4:
201; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
203; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
204; X86-NEXT: movl (%ecx), %ecx
205; X86-NEXT: movl (%eax), %edx
206; X86-NEXT: bswapl %ecx
207; X86-NEXT: bswapl %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000208; X86-NEXT: xorl %eax, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000209; X86-NEXT: cmpl %edx, %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000210; X86-NEXT: seta %al
211; X86-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000212; X86-NEXT: retl
213;
214; X64-LABEL: length4:
215; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000216; X64-NEXT: movl (%rdi), %ecx
217; X64-NEXT: movl (%rsi), %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000218; X64-NEXT: bswapl %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000219; X64-NEXT: bswapl %edx
220; X64-NEXT: xorl %eax, %eax
221; X64-NEXT: cmpl %edx, %ecx
222; X64-NEXT: seta %al
223; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000224; X64-NEXT: retq
225 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
226 ret i32 %m
227}
228
229define i1 @length4_eq(i8* %X, i8* %Y) nounwind optsize {
230; X86-LABEL: length4_eq:
231; X86: # BB#0:
232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
233; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
234; X86-NEXT: movl (%ecx), %ecx
235; X86-NEXT: cmpl (%eax), %ecx
236; X86-NEXT: setne %al
237; X86-NEXT: retl
238;
239; X64-LABEL: length4_eq:
240; X64: # BB#0:
241; X64-NEXT: movl (%rdi), %eax
242; X64-NEXT: cmpl (%rsi), %eax
243; X64-NEXT: setne %al
244; X64-NEXT: retq
245 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
246 %c = icmp ne i32 %m, 0
247 ret i1 %c
248}
249
250define i1 @length4_eq_const(i8* %X) nounwind optsize {
251; X86-LABEL: length4_eq_const:
252; X86: # BB#0:
253; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
254; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
255; X86-NEXT: sete %al
256; X86-NEXT: retl
257;
258; X64-LABEL: length4_eq_const:
259; X64: # BB#0:
260; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
261; X64-NEXT: sete %al
262; X64-NEXT: retq
263 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
264 %c = icmp eq i32 %m, 0
265 ret i1 %c
266}
267
268define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
269; X86-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000270; X86: # BB#0: # %loadbb
271; X86-NEXT: pushl %esi
272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
273; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
274; X86-NEXT: movl (%eax), %edx
275; X86-NEXT: movl (%ecx), %esi
276; X86-NEXT: bswapl %edx
277; X86-NEXT: bswapl %esi
278; X86-NEXT: cmpl %esi, %edx
279; X86-NEXT: jne .LBB9_1
280; X86-NEXT: # BB#2: # %loadbb1
281; X86-NEXT: movzbl 4(%eax), %eax
282; X86-NEXT: movzbl 4(%ecx), %ecx
283; X86-NEXT: subl %ecx, %eax
284; X86-NEXT: jmp .LBB9_3
285; X86-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000286; X86-NEXT: setae %al
287; X86-NEXT: movzbl %al, %eax
288; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000289; X86-NEXT: .LBB9_3: # %endblock
290; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000291; X86-NEXT: retl
292;
293; X64-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000294; X64: # BB#0: # %loadbb
295; X64-NEXT: movl (%rdi), %eax
296; X64-NEXT: movl (%rsi), %ecx
297; X64-NEXT: bswapl %eax
298; X64-NEXT: bswapl %ecx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000299; X64-NEXT: cmpl %ecx, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000300; X64-NEXT: jne .LBB9_1
301; X64-NEXT: # BB#2: # %loadbb1
302; X64-NEXT: movzbl 4(%rdi), %eax
303; X64-NEXT: movzbl 4(%rsi), %ecx
304; X64-NEXT: subl %ecx, %eax
305; X64-NEXT: retq
306; X64-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000307; X64-NEXT: setae %al
308; X64-NEXT: movzbl %al, %eax
309; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000310; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000311 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
312 ret i32 %m
313}
314
315define i1 @length5_eq(i8* %X, i8* %Y) nounwind optsize {
316; X86-LABEL: length5_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000317; X86: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000318; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000319; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
320; X86-NEXT: movl (%ecx), %edx
321; X86-NEXT: cmpl (%eax), %edx
322; X86-NEXT: jne .LBB10_2
323; X86-NEXT: # BB#1: # %loadbb1
324; X86-NEXT: movb 4(%ecx), %dl
325; X86-NEXT: xorl %ecx, %ecx
326; X86-NEXT: cmpb 4(%eax), %dl
Simon Pilgrim483927a2017-07-18 15:55:30 +0000327; X86-NEXT: je .LBB10_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000328; X86-NEXT: .LBB10_2: # %res_block
329; X86-NEXT: xorl %ecx, %ecx
330; X86-NEXT: incl %ecx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000331; X86-NEXT: .LBB10_3: # %endblock
Clement Courbet1dc37b92017-11-02 15:02:51 +0000332; X86-NEXT: testl %ecx, %ecx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000333; X86-NEXT: setne %al
334; X86-NEXT: retl
335;
336; X64-LABEL: length5_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000337; X64: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000338; X64-NEXT: movl (%rdi), %eax
339; X64-NEXT: cmpl (%rsi), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000340; X64-NEXT: jne .LBB10_2
341; X64-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000342; X64-NEXT: movb 4(%rdi), %cl
343; X64-NEXT: xorl %eax, %eax
344; X64-NEXT: cmpb 4(%rsi), %cl
345; X64-NEXT: je .LBB10_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000346; X64-NEXT: .LBB10_2: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000347; X64-NEXT: movl $1, %eax
348; X64-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000349; X64-NEXT: testl %eax, %eax
350; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000351; X64-NEXT: retq
352 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
353 %c = icmp ne i32 %m, 0
354 ret i1 %c
355}
356
357define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
358; X86-LABEL: length8:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000359; X86: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000360; X86-NEXT: pushl %esi
361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
362; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
363; X86-NEXT: movl (%esi), %ecx
364; X86-NEXT: movl (%eax), %edx
365; X86-NEXT: bswapl %ecx
366; X86-NEXT: bswapl %edx
367; X86-NEXT: cmpl %edx, %ecx
Clement Courbet1dc37b92017-11-02 15:02:51 +0000368; X86-NEXT: jne .LBB11_2
369; X86-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000370; X86-NEXT: movl 4(%esi), %ecx
371; X86-NEXT: movl 4(%eax), %edx
372; X86-NEXT: bswapl %ecx
373; X86-NEXT: bswapl %edx
374; X86-NEXT: xorl %eax, %eax
375; X86-NEXT: cmpl %edx, %ecx
376; X86-NEXT: je .LBB11_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000377; X86-NEXT: .LBB11_2: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000378; X86-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000379; X86-NEXT: cmpl %edx, %ecx
Sanjay Patel169dae72017-08-11 15:44:14 +0000380; X86-NEXT: setae %al
381; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000382; X86-NEXT: .LBB11_3: # %endblock
383; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000384; X86-NEXT: retl
385;
386; X64-LABEL: length8:
387; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000388; X64-NEXT: movq (%rdi), %rcx
389; X64-NEXT: movq (%rsi), %rdx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000390; X64-NEXT: bswapq %rcx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000391; X64-NEXT: bswapq %rdx
392; X64-NEXT: xorl %eax, %eax
393; X64-NEXT: cmpq %rdx, %rcx
394; X64-NEXT: seta %al
395; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000396; X64-NEXT: retq
397 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
398 ret i32 %m
399}
400
401define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
402; X86-LABEL: length8_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000403; X86: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000404; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000405; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
406; X86-NEXT: movl (%ecx), %edx
407; X86-NEXT: cmpl (%eax), %edx
408; X86-NEXT: jne .LBB12_2
409; X86-NEXT: # BB#1: # %loadbb1
410; X86-NEXT: movl 4(%ecx), %edx
411; X86-NEXT: xorl %ecx, %ecx
412; X86-NEXT: cmpl 4(%eax), %edx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000413; X86-NEXT: je .LBB12_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000414; X86-NEXT: .LBB12_2: # %res_block
415; X86-NEXT: xorl %ecx, %ecx
416; X86-NEXT: incl %ecx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000417; X86-NEXT: .LBB12_3: # %endblock
Clement Courbet1dc37b92017-11-02 15:02:51 +0000418; X86-NEXT: testl %ecx, %ecx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000419; X86-NEXT: sete %al
420; X86-NEXT: retl
421;
422; X64-LABEL: length8_eq:
423; X64: # BB#0:
424; X64-NEXT: movq (%rdi), %rax
425; X64-NEXT: cmpq (%rsi), %rax
426; X64-NEXT: sete %al
427; X64-NEXT: retq
428 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
429 %c = icmp eq i32 %m, 0
430 ret i1 %c
431}
432
433define i1 @length8_eq_const(i8* %X) nounwind optsize {
434; X86-LABEL: length8_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000435; X86: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000436; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
437; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
Clement Courbet1dc37b92017-11-02 15:02:51 +0000438; X86-NEXT: jne .LBB13_2
439; X86-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000440; X86-NEXT: xorl %eax, %eax
441; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
442; X86-NEXT: je .LBB13_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000443; X86-NEXT: .LBB13_2: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000444; X86-NEXT: xorl %eax, %eax
445; X86-NEXT: incl %eax
446; X86-NEXT: .LBB13_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000447; X86-NEXT: testl %eax, %eax
448; X86-NEXT: setne %al
449; X86-NEXT: retl
450;
451; X64-LABEL: length8_eq_const:
452; X64: # BB#0:
453; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
454; X64-NEXT: cmpq %rax, (%rdi)
455; X64-NEXT: setne %al
456; X64-NEXT: retq
457 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
458 %c = icmp ne i32 %m, 0
459 ret i1 %c
460}
461
462define i1 @length12_eq(i8* %X, i8* %Y) nounwind optsize {
463; X86-LABEL: length12_eq:
464; X86: # BB#0:
465; X86-NEXT: pushl $0
466; X86-NEXT: pushl $12
467; X86-NEXT: pushl {{[0-9]+}}(%esp)
468; X86-NEXT: pushl {{[0-9]+}}(%esp)
469; X86-NEXT: calll memcmp
470; X86-NEXT: addl $16, %esp
471; X86-NEXT: testl %eax, %eax
472; X86-NEXT: setne %al
473; X86-NEXT: retl
474;
475; X64-LABEL: length12_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000476; X64: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000477; X64-NEXT: movq (%rdi), %rax
478; X64-NEXT: cmpq (%rsi), %rax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000479; X64-NEXT: jne .LBB14_2
480; X64-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000481; X64-NEXT: movl 8(%rdi), %ecx
482; X64-NEXT: xorl %eax, %eax
483; X64-NEXT: cmpl 8(%rsi), %ecx
484; X64-NEXT: je .LBB14_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000485; X64-NEXT: .LBB14_2: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000486; X64-NEXT: movl $1, %eax
487; X64-NEXT: .LBB14_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000488; X64-NEXT: testl %eax, %eax
489; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000490; X64-NEXT: retq
491 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
492 %c = icmp ne i32 %m, 0
493 ret i1 %c
494}
495
496define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
497; X86-LABEL: length12:
498; X86: # BB#0:
499; X86-NEXT: pushl $0
500; X86-NEXT: pushl $12
501; X86-NEXT: pushl {{[0-9]+}}(%esp)
502; X86-NEXT: pushl {{[0-9]+}}(%esp)
503; X86-NEXT: calll memcmp
504; X86-NEXT: addl $16, %esp
505; X86-NEXT: retl
506;
507; X64-LABEL: length12:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000508; X64: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000509; X64-NEXT: movq (%rdi), %rcx
510; X64-NEXT: movq (%rsi), %rdx
511; X64-NEXT: bswapq %rcx
512; X64-NEXT: bswapq %rdx
513; X64-NEXT: cmpq %rdx, %rcx
Clement Courbet1dc37b92017-11-02 15:02:51 +0000514; X64-NEXT: jne .LBB15_2
515; X64-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000516; X64-NEXT: movl 8(%rdi), %ecx
517; X64-NEXT: movl 8(%rsi), %edx
518; X64-NEXT: bswapl %ecx
519; X64-NEXT: bswapl %edx
520; X64-NEXT: xorl %eax, %eax
521; X64-NEXT: cmpq %rdx, %rcx
Clement Courbet1dc37b92017-11-02 15:02:51 +0000522; X64-NEXT: je .LBB15_3
523; X64-NEXT: .LBB15_2: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000524; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000525; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000526; X64-NEXT: setae %al
527; X64-NEXT: leal -1(%rax,%rax), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000528; X64-NEXT: .LBB15_3: # %endblock
Simon Pilgrim483927a2017-07-18 15:55:30 +0000529; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000530 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
531 ret i32 %m
532}
533
534; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
535
536define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
537; X86-LABEL: length16:
538; X86: # BB#0:
539; X86-NEXT: pushl $0
540; X86-NEXT: pushl $16
541; X86-NEXT: pushl {{[0-9]+}}(%esp)
542; X86-NEXT: pushl {{[0-9]+}}(%esp)
543; X86-NEXT: calll memcmp
544; X86-NEXT: addl $16, %esp
545; X86-NEXT: retl
546;
547; X64-LABEL: length16:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000548; X64: # BB#0:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000549; X64-NEXT: movq (%rdi), %rcx
550; X64-NEXT: movq (%rsi), %rdx
551; X64-NEXT: bswapq %rcx
552; X64-NEXT: bswapq %rdx
553; X64-NEXT: cmpq %rdx, %rcx
Clement Courbet1dc37b92017-11-02 15:02:51 +0000554; X64-NEXT: jne .LBB16_2
555; X64-NEXT: # BB#1: # %loadbb1
Simon Pilgrim483927a2017-07-18 15:55:30 +0000556; X64-NEXT: movq 8(%rdi), %rcx
557; X64-NEXT: movq 8(%rsi), %rdx
558; X64-NEXT: bswapq %rcx
559; X64-NEXT: bswapq %rdx
560; X64-NEXT: xorl %eax, %eax
561; X64-NEXT: cmpq %rdx, %rcx
Clement Courbet1dc37b92017-11-02 15:02:51 +0000562; X64-NEXT: je .LBB16_3
563; X64-NEXT: .LBB16_2: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000564; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000565; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000566; X64-NEXT: setae %al
567; X64-NEXT: leal -1(%rax,%rax), %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +0000568; X64-NEXT: .LBB16_3: # %endblock
Simon Pilgrim483927a2017-07-18 15:55:30 +0000569; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000570 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
571 ret i32 %m
572}
573
574define i1 @length16_eq(i8* %x, i8* %y) nounwind optsize {
575; X86-NOSSE-LABEL: length16_eq:
576; X86-NOSSE: # BB#0:
577; X86-NOSSE-NEXT: pushl $0
578; X86-NOSSE-NEXT: pushl $16
579; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
580; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
581; X86-NOSSE-NEXT: calll memcmp
582; X86-NOSSE-NEXT: addl $16, %esp
583; X86-NOSSE-NEXT: testl %eax, %eax
584; X86-NOSSE-NEXT: setne %al
585; X86-NOSSE-NEXT: retl
586;
587; X86-SSE2-LABEL: length16_eq:
588; X86-SSE2: # BB#0:
589; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
590; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
591; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
592; X86-SSE2-NEXT: movdqu (%eax), %xmm1
593; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
594; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
595; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
596; X86-SSE2-NEXT: setne %al
597; X86-SSE2-NEXT: retl
598;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000599; X64-SSE2-LABEL: length16_eq:
600; X64-SSE2: # BB#0:
601; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
602; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
603; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
604; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
605; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
606; X64-SSE2-NEXT: setne %al
607; X64-SSE2-NEXT: retq
608;
609; X64-AVX2-LABEL: length16_eq:
610; X64-AVX2: # BB#0:
611; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
612; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
613; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
614; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
615; X64-AVX2-NEXT: setne %al
616; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000617 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
618 %cmp = icmp ne i32 %call, 0
619 ret i1 %cmp
620}
621
622define i1 @length16_eq_const(i8* %X) nounwind optsize {
623; X86-NOSSE-LABEL: length16_eq_const:
624; X86-NOSSE: # BB#0:
625; X86-NOSSE-NEXT: pushl $0
626; X86-NOSSE-NEXT: pushl $16
627; X86-NOSSE-NEXT: pushl $.L.str
628; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
629; X86-NOSSE-NEXT: calll memcmp
630; X86-NOSSE-NEXT: addl $16, %esp
631; X86-NOSSE-NEXT: testl %eax, %eax
632; X86-NOSSE-NEXT: sete %al
633; X86-NOSSE-NEXT: retl
634;
635; X86-SSE2-LABEL: length16_eq_const:
636; X86-SSE2: # BB#0:
637; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
638; X86-SSE2-NEXT: movdqu (%eax), %xmm0
639; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
640; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
641; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
642; X86-SSE2-NEXT: sete %al
643; X86-SSE2-NEXT: retl
644;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000645; X64-SSE2-LABEL: length16_eq_const:
646; X64-SSE2: # BB#0:
647; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
648; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
649; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
650; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
651; X64-SSE2-NEXT: sete %al
652; X64-SSE2-NEXT: retq
653;
654; X64-AVX2-LABEL: length16_eq_const:
655; X64-AVX2: # BB#0:
656; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
657; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
658; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
659; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
660; X64-AVX2-NEXT: sete %al
661; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000662 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
663 %c = icmp eq i32 %m, 0
664 ret i1 %c
665}
666
Simon Pilgrim3459f102017-07-25 10:33:36 +0000667; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
668
669define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
670; X86-LABEL: length24:
671; X86: # BB#0:
672; X86-NEXT: pushl $0
673; X86-NEXT: pushl $24
674; X86-NEXT: pushl {{[0-9]+}}(%esp)
675; X86-NEXT: pushl {{[0-9]+}}(%esp)
676; X86-NEXT: calll memcmp
677; X86-NEXT: addl $16, %esp
678; X86-NEXT: retl
679;
680; X64-LABEL: length24:
681; X64: # BB#0:
682; X64-NEXT: movl $24, %edx
683; X64-NEXT: jmp memcmp # TAILCALL
684 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
685 ret i32 %m
686}
687
688define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
689; X86-LABEL: length24_eq:
690; X86: # BB#0:
691; X86-NEXT: pushl $0
692; X86-NEXT: pushl $24
693; X86-NEXT: pushl {{[0-9]+}}(%esp)
694; X86-NEXT: pushl {{[0-9]+}}(%esp)
695; X86-NEXT: calll memcmp
696; X86-NEXT: addl $16, %esp
697; X86-NEXT: testl %eax, %eax
698; X86-NEXT: sete %al
699; X86-NEXT: retl
700;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000701; X64-SSE2-LABEL: length24_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000702; X64-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000703; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
704; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
705; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
706; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
707; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000708; X64-SSE2-NEXT: jne .LBB20_2
709; X64-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000710; X64-SSE2-NEXT: movq 16(%rdi), %rcx
711; X64-SSE2-NEXT: xorl %eax, %eax
712; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
713; X64-SSE2-NEXT: je .LBB20_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000714; X64-SSE2-NEXT: .LBB20_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000715; X64-SSE2-NEXT: movl $1, %eax
716; X64-SSE2-NEXT: .LBB20_3: # %endblock
717; X64-SSE2-NEXT: testl %eax, %eax
718; X64-SSE2-NEXT: sete %al
719; X64-SSE2-NEXT: retq
720;
721; X64-AVX2-LABEL: length24_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000722; X64-AVX2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000723; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
724; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
725; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
726; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000727; X64-AVX2-NEXT: jne .LBB20_2
728; X64-AVX2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000729; X64-AVX2-NEXT: movq 16(%rdi), %rcx
730; X64-AVX2-NEXT: xorl %eax, %eax
731; X64-AVX2-NEXT: cmpq 16(%rsi), %rcx
732; X64-AVX2-NEXT: je .LBB20_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000733; X64-AVX2-NEXT: .LBB20_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000734; X64-AVX2-NEXT: movl $1, %eax
735; X64-AVX2-NEXT: .LBB20_3: # %endblock
736; X64-AVX2-NEXT: testl %eax, %eax
737; X64-AVX2-NEXT: sete %al
738; X64-AVX2-NEXT: retq
Simon Pilgrim3459f102017-07-25 10:33:36 +0000739 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
740 %cmp = icmp eq i32 %call, 0
741 ret i1 %cmp
742}
743
744define i1 @length24_eq_const(i8* %X) nounwind optsize {
745; X86-LABEL: length24_eq_const:
746; X86: # BB#0:
747; X86-NEXT: pushl $0
748; X86-NEXT: pushl $24
749; X86-NEXT: pushl $.L.str
750; X86-NEXT: pushl {{[0-9]+}}(%esp)
751; X86-NEXT: calll memcmp
752; X86-NEXT: addl $16, %esp
753; X86-NEXT: testl %eax, %eax
754; X86-NEXT: setne %al
755; X86-NEXT: retl
756;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000757; X64-SSE2-LABEL: length24_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000758; X64-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000759; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
760; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
761; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
762; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000763; X64-SSE2-NEXT: jne .LBB21_2
764; X64-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000765; X64-SSE2-NEXT: xorl %eax, %eax
766; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
767; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
768; X64-SSE2-NEXT: je .LBB21_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000769; X64-SSE2-NEXT: .LBB21_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000770; X64-SSE2-NEXT: movl $1, %eax
771; X64-SSE2-NEXT: .LBB21_3: # %endblock
772; X64-SSE2-NEXT: testl %eax, %eax
773; X64-SSE2-NEXT: setne %al
774; X64-SSE2-NEXT: retq
775;
776; X64-AVX2-LABEL: length24_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000777; X64-AVX2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000778; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
779; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
780; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
781; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000782; X64-AVX2-NEXT: jne .LBB21_2
783; X64-AVX2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000784; X64-AVX2-NEXT: xorl %eax, %eax
785; X64-AVX2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
786; X64-AVX2-NEXT: cmpq %rcx, 16(%rdi)
787; X64-AVX2-NEXT: je .LBB21_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000788; X64-AVX2-NEXT: .LBB21_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000789; X64-AVX2-NEXT: movl $1, %eax
790; X64-AVX2-NEXT: .LBB21_3: # %endblock
791; X64-AVX2-NEXT: testl %eax, %eax
792; X64-AVX2-NEXT: setne %al
793; X64-AVX2-NEXT: retq
Simon Pilgrim3459f102017-07-25 10:33:36 +0000794 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
795 %c = icmp ne i32 %m, 0
796 ret i1 %c
797}
798
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000799define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
800; X86-LABEL: length32:
801; X86: # BB#0:
802; X86-NEXT: pushl $0
803; X86-NEXT: pushl $32
804; X86-NEXT: pushl {{[0-9]+}}(%esp)
805; X86-NEXT: pushl {{[0-9]+}}(%esp)
806; X86-NEXT: calll memcmp
807; X86-NEXT: addl $16, %esp
808; X86-NEXT: retl
809;
810; X64-LABEL: length32:
811; X64: # BB#0:
812; X64-NEXT: movl $32, %edx
813; X64-NEXT: jmp memcmp # TAILCALL
814 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
815 ret i32 %m
816}
817
818; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
819
820define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000821; X86-NOSSE-LABEL: length32_eq:
822; X86-NOSSE: # BB#0:
823; X86-NOSSE-NEXT: pushl $0
824; X86-NOSSE-NEXT: pushl $32
825; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
826; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
827; X86-NOSSE-NEXT: calll memcmp
828; X86-NOSSE-NEXT: addl $16, %esp
829; X86-NOSSE-NEXT: testl %eax, %eax
830; X86-NOSSE-NEXT: sete %al
831; X86-NOSSE-NEXT: retl
832;
833; X86-SSE2-LABEL: length32_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000834; X86-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000835; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
836; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
837; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
838; X86-SSE2-NEXT: movdqu (%eax), %xmm1
839; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
840; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
841; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000842; X86-SSE2-NEXT: jne .LBB23_2
843; X86-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000844; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
845; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
846; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
847; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
848; X86-SSE2-NEXT: xorl %eax, %eax
849; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
850; X86-SSE2-NEXT: je .LBB23_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000851; X86-SSE2-NEXT: .LBB23_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000852; X86-SSE2-NEXT: xorl %eax, %eax
853; X86-SSE2-NEXT: incl %eax
854; X86-SSE2-NEXT: .LBB23_3: # %endblock
855; X86-SSE2-NEXT: testl %eax, %eax
856; X86-SSE2-NEXT: sete %al
857; X86-SSE2-NEXT: retl
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000858;
859; X64-SSE2-LABEL: length32_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000860; X64-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000861; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
862; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
863; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
864; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
865; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000866; X64-SSE2-NEXT: jne .LBB23_2
867; X64-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000868; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
869; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
870; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
871; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
872; X64-SSE2-NEXT: xorl %eax, %eax
873; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
874; X64-SSE2-NEXT: je .LBB23_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000875; X64-SSE2-NEXT: .LBB23_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000876; X64-SSE2-NEXT: movl $1, %eax
877; X64-SSE2-NEXT: .LBB23_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000878; X64-SSE2-NEXT: testl %eax, %eax
879; X64-SSE2-NEXT: sete %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000880; X64-SSE2-NEXT: retq
881;
882; X64-AVX2-LABEL: length32_eq:
883; X64-AVX2: # BB#0:
884; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
885; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
886; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
887; X64-AVX2-NEXT: cmpl $-1, %eax
888; X64-AVX2-NEXT: sete %al
889; X64-AVX2-NEXT: vzeroupper
890; X64-AVX2-NEXT: retq
891 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
892 %cmp = icmp eq i32 %call, 0
893 ret i1 %cmp
894}
895
896define i1 @length32_eq_const(i8* %X) nounwind optsize {
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000897; X86-NOSSE-LABEL: length32_eq_const:
898; X86-NOSSE: # BB#0:
899; X86-NOSSE-NEXT: pushl $0
900; X86-NOSSE-NEXT: pushl $32
901; X86-NOSSE-NEXT: pushl $.L.str
902; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
903; X86-NOSSE-NEXT: calll memcmp
904; X86-NOSSE-NEXT: addl $16, %esp
905; X86-NOSSE-NEXT: testl %eax, %eax
906; X86-NOSSE-NEXT: setne %al
907; X86-NOSSE-NEXT: retl
908;
909; X86-SSE2-LABEL: length32_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000910; X86-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000911; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
912; X86-SSE2-NEXT: movdqu (%eax), %xmm0
913; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
914; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
915; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000916; X86-SSE2-NEXT: jne .LBB24_2
917; X86-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000918; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
919; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
920; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
921; X86-SSE2-NEXT: xorl %eax, %eax
922; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
923; X86-SSE2-NEXT: je .LBB24_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000924; X86-SSE2-NEXT: .LBB24_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000925; X86-SSE2-NEXT: xorl %eax, %eax
926; X86-SSE2-NEXT: incl %eax
927; X86-SSE2-NEXT: .LBB24_3: # %endblock
928; X86-SSE2-NEXT: testl %eax, %eax
929; X86-SSE2-NEXT: setne %al
930; X86-SSE2-NEXT: retl
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000931;
932; X64-SSE2-LABEL: length32_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +0000933; X64-SSE2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000934; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
935; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
936; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
937; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
Clement Courbet1dc37b92017-11-02 15:02:51 +0000938; X64-SSE2-NEXT: jne .LBB24_2
939; X64-SSE2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000940; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
941; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
942; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
943; X64-SSE2-NEXT: xorl %eax, %eax
944; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
945; X64-SSE2-NEXT: je .LBB24_3
Clement Courbet1dc37b92017-11-02 15:02:51 +0000946; X64-SSE2-NEXT: .LBB24_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000947; X64-SSE2-NEXT: movl $1, %eax
948; X64-SSE2-NEXT: .LBB24_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000949; X64-SSE2-NEXT: testl %eax, %eax
950; X64-SSE2-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000951; X64-SSE2-NEXT: retq
952;
953; X64-AVX2-LABEL: length32_eq_const:
954; X64-AVX2: # BB#0:
955; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
956; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
957; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
958; X64-AVX2-NEXT: cmpl $-1, %eax
959; X64-AVX2-NEXT: setne %al
960; X64-AVX2-NEXT: vzeroupper
961; X64-AVX2-NEXT: retq
962 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
963 %c = icmp ne i32 %m, 0
964 ret i1 %c
965}
966
967define i32 @length64(i8* %X, i8* %Y) nounwind optsize {
968; X86-LABEL: length64:
969; X86: # BB#0:
970; X86-NEXT: pushl $0
971; X86-NEXT: pushl $64
972; X86-NEXT: pushl {{[0-9]+}}(%esp)
973; X86-NEXT: pushl {{[0-9]+}}(%esp)
974; X86-NEXT: calll memcmp
975; X86-NEXT: addl $16, %esp
976; X86-NEXT: retl
977;
978; X64-LABEL: length64:
979; X64: # BB#0:
980; X64-NEXT: movl $64, %edx
981; X64-NEXT: jmp memcmp # TAILCALL
982 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
983 ret i32 %m
984}
985
986define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
987; X86-LABEL: length64_eq:
988; X86: # BB#0:
989; X86-NEXT: pushl $0
990; X86-NEXT: pushl $64
991; X86-NEXT: pushl {{[0-9]+}}(%esp)
992; X86-NEXT: pushl {{[0-9]+}}(%esp)
993; X86-NEXT: calll memcmp
994; X86-NEXT: addl $16, %esp
995; X86-NEXT: testl %eax, %eax
996; X86-NEXT: setne %al
997; X86-NEXT: retl
998;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000999; X64-SSE2-LABEL: length64_eq:
1000; X64-SSE2: # BB#0:
1001; X64-SSE2-NEXT: pushq %rax
1002; X64-SSE2-NEXT: movl $64, %edx
1003; X64-SSE2-NEXT: callq memcmp
1004; X64-SSE2-NEXT: testl %eax, %eax
1005; X64-SSE2-NEXT: setne %al
1006; X64-SSE2-NEXT: popq %rcx
1007; X64-SSE2-NEXT: retq
1008;
1009; X64-AVX2-LABEL: length64_eq:
Clement Courbet1dc37b92017-11-02 15:02:51 +00001010; X64-AVX2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001011; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1012; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
1013; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1014; X64-AVX2-NEXT: cmpl $-1, %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +00001015; X64-AVX2-NEXT: jne .LBB26_2
1016; X64-AVX2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001017; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1018; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
1019; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1020; X64-AVX2-NEXT: xorl %eax, %eax
1021; X64-AVX2-NEXT: cmpl $-1, %ecx
1022; X64-AVX2-NEXT: je .LBB26_3
Clement Courbet1dc37b92017-11-02 15:02:51 +00001023; X64-AVX2-NEXT: .LBB26_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001024; X64-AVX2-NEXT: movl $1, %eax
1025; X64-AVX2-NEXT: .LBB26_3: # %endblock
1026; X64-AVX2-NEXT: testl %eax, %eax
1027; X64-AVX2-NEXT: setne %al
1028; X64-AVX2-NEXT: vzeroupper
1029; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001030 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
1031 %cmp = icmp ne i32 %call, 0
1032 ret i1 %cmp
1033}
1034
1035define i1 @length64_eq_const(i8* %X) nounwind optsize {
1036; X86-LABEL: length64_eq_const:
1037; X86: # BB#0:
1038; X86-NEXT: pushl $0
1039; X86-NEXT: pushl $64
1040; X86-NEXT: pushl $.L.str
1041; X86-NEXT: pushl {{[0-9]+}}(%esp)
1042; X86-NEXT: calll memcmp
1043; X86-NEXT: addl $16, %esp
1044; X86-NEXT: testl %eax, %eax
1045; X86-NEXT: sete %al
1046; X86-NEXT: retl
1047;
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001048; X64-SSE2-LABEL: length64_eq_const:
1049; X64-SSE2: # BB#0:
1050; X64-SSE2-NEXT: pushq %rax
1051; X64-SSE2-NEXT: movl $.L.str, %esi
1052; X64-SSE2-NEXT: movl $64, %edx
1053; X64-SSE2-NEXT: callq memcmp
1054; X64-SSE2-NEXT: testl %eax, %eax
1055; X64-SSE2-NEXT: sete %al
1056; X64-SSE2-NEXT: popq %rcx
1057; X64-SSE2-NEXT: retq
1058;
1059; X64-AVX2-LABEL: length64_eq_const:
Clement Courbet1dc37b92017-11-02 15:02:51 +00001060; X64-AVX2: # BB#0:
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001061; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1062; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1063; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1064; X64-AVX2-NEXT: cmpl $-1, %eax
Clement Courbet1dc37b92017-11-02 15:02:51 +00001065; X64-AVX2-NEXT: jne .LBB27_2
1066; X64-AVX2-NEXT: # BB#1: # %loadbb1
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001067; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1068; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1069; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1070; X64-AVX2-NEXT: xorl %eax, %eax
1071; X64-AVX2-NEXT: cmpl $-1, %ecx
1072; X64-AVX2-NEXT: je .LBB27_3
Clement Courbet1dc37b92017-11-02 15:02:51 +00001073; X64-AVX2-NEXT: .LBB27_2: # %res_block
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001074; X64-AVX2-NEXT: movl $1, %eax
1075; X64-AVX2-NEXT: .LBB27_3: # %endblock
1076; X64-AVX2-NEXT: testl %eax, %eax
1077; X64-AVX2-NEXT: sete %al
1078; X64-AVX2-NEXT: vzeroupper
1079; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001080 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
1081 %c = icmp eq i32 %m, 0
1082 ret i1 %c
1083}
1084