blob: 4489aaf6b2dd314e14de672d69dbee61913d3219 [file] [log] [blame]
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
6
7; This tests codegen time inlining/optimization of memcmp
8; rdar://6480398
9
10@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
11
12declare i32 @memcmp(i8*, i8*, i64)
13
14define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
15; X86-LABEL: length2:
16; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000017; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
18; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
19; X86-NEXT: movzwl (%ecx), %ecx
20; X86-NEXT: movzwl (%eax), %edx
21; X86-NEXT: rolw $8, %cx
22; X86-NEXT: rolw $8, %dx
Sanjay Patelfea731a2017-07-31 18:08:24 +000023; X86-NEXT: movzwl %cx, %eax
24; X86-NEXT: movzwl %dx, %ecx
25; X86-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000026; X86-NEXT: retl
27;
28; X64-LABEL: length2:
29; X64: # BB#0:
30; X64-NEXT: movzwl (%rdi), %eax
31; X64-NEXT: movzwl (%rsi), %ecx
32; X64-NEXT: rolw $8, %ax
33; X64-NEXT: rolw $8, %cx
Sanjay Patelfea731a2017-07-31 18:08:24 +000034; X64-NEXT: movzwl %ax, %eax
35; X64-NEXT: movzwl %cx, %ecx
36; X64-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000037; X64-NEXT: retq
38 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
39 ret i32 %m
40}
41
42define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {
43; X86-LABEL: length2_eq:
44; X86: # BB#0:
45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
47; X86-NEXT: movzwl (%ecx), %ecx
48; X86-NEXT: cmpw (%eax), %cx
49; X86-NEXT: sete %al
50; X86-NEXT: retl
51;
52; X64-LABEL: length2_eq:
53; X64: # BB#0:
54; X64-NEXT: movzwl (%rdi), %eax
55; X64-NEXT: cmpw (%rsi), %ax
56; X64-NEXT: sete %al
57; X64-NEXT: retq
58 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
59 %c = icmp eq i32 %m, 0
60 ret i1 %c
61}
62
63define i1 @length2_eq_const(i8* %X) nounwind optsize {
64; X86-LABEL: length2_eq_const:
65; X86: # BB#0:
66; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT: movzwl (%eax), %eax
68; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
69; X86-NEXT: setne %al
70; X86-NEXT: retl
71;
72; X64-LABEL: length2_eq_const:
73; X64: # BB#0:
74; X64-NEXT: movzwl (%rdi), %eax
75; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
76; X64-NEXT: setne %al
77; X64-NEXT: retq
78 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
79 %c = icmp ne i32 %m, 0
80 ret i1 %c
81}
82
83define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind optsize {
84; X86-LABEL: length2_eq_nobuiltin_attr:
85; X86: # BB#0:
86; X86-NEXT: pushl $0
87; X86-NEXT: pushl $2
88; X86-NEXT: pushl {{[0-9]+}}(%esp)
89; X86-NEXT: pushl {{[0-9]+}}(%esp)
90; X86-NEXT: calll memcmp
91; X86-NEXT: addl $16, %esp
92; X86-NEXT: testl %eax, %eax
93; X86-NEXT: sete %al
94; X86-NEXT: retl
95;
96; X64-LABEL: length2_eq_nobuiltin_attr:
97; X64: # BB#0:
98; X64-NEXT: pushq %rax
99; X64-NEXT: movl $2, %edx
100; X64-NEXT: callq memcmp
101; X64-NEXT: testl %eax, %eax
102; X64-NEXT: sete %al
103; X64-NEXT: popq %rcx
104; X64-NEXT: retq
105 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
106 %c = icmp eq i32 %m, 0
107 ret i1 %c
108}
109
110define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
111; X86-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000112; X86: # BB#0: # %loadbb
113; X86-NEXT: pushl %esi
114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT: movzwl (%eax), %edx
117; X86-NEXT: movzwl (%ecx), %esi
118; X86-NEXT: rolw $8, %dx
119; X86-NEXT: rolw $8, %si
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000120; X86-NEXT: cmpw %si, %dx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000121; X86-NEXT: jne .LBB4_1
122; X86-NEXT: # BB#2: # %loadbb1
123; X86-NEXT: movzbl 2(%eax), %eax
124; X86-NEXT: movzbl 2(%ecx), %ecx
125; X86-NEXT: subl %ecx, %eax
126; X86-NEXT: jmp .LBB4_3
127; X86-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000128; X86-NEXT: setae %al
129; X86-NEXT: movzbl %al, %eax
130; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000131; X86-NEXT: .LBB4_3: # %endblock
132; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000133; X86-NEXT: retl
134;
135; X64-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000136; X64: # BB#0: # %loadbb
137; X64-NEXT: movzwl (%rdi), %eax
138; X64-NEXT: movzwl (%rsi), %ecx
139; X64-NEXT: rolw $8, %ax
140; X64-NEXT: rolw $8, %cx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000141; X64-NEXT: cmpw %cx, %ax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000142; X64-NEXT: jne .LBB4_1
143; X64-NEXT: # BB#2: # %loadbb1
144; X64-NEXT: movzbl 2(%rdi), %eax
145; X64-NEXT: movzbl 2(%rsi), %ecx
146; X64-NEXT: subl %ecx, %eax
147; X64-NEXT: retq
148; X64-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000149; X64-NEXT: setae %al
150; X64-NEXT: movzbl %al, %eax
151; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000152; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000153 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
154 ret i32 %m
155}
156
157define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
158; X86-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000159; X86: # BB#0: # %loadbb
160; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
162; X86-NEXT: movzwl (%eax), %edx
163; X86-NEXT: cmpw (%ecx), %dx
164; X86-NEXT: jne .LBB5_1
165; X86-NEXT: # BB#2: # %loadbb1
166; X86-NEXT: movb 2(%eax), %dl
167; X86-NEXT: xorl %eax, %eax
168; X86-NEXT: cmpb 2(%ecx), %dl
169; X86-NEXT: je .LBB5_3
170; X86-NEXT: .LBB5_1: # %res_block
171; X86-NEXT: xorl %eax, %eax
172; X86-NEXT: incl %eax
173; X86-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000174; X86-NEXT: testl %eax, %eax
175; X86-NEXT: setne %al
176; X86-NEXT: retl
177;
178; X64-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000179; X64: # BB#0: # %loadbb
180; X64-NEXT: movzwl (%rdi), %eax
181; X64-NEXT: cmpw (%rsi), %ax
182; X64-NEXT: jne .LBB5_1
183; X64-NEXT: # BB#2: # %loadbb1
184; X64-NEXT: movb 2(%rdi), %cl
185; X64-NEXT: xorl %eax, %eax
186; X64-NEXT: cmpb 2(%rsi), %cl
187; X64-NEXT: je .LBB5_3
188; X64-NEXT: .LBB5_1: # %res_block
189; X64-NEXT: movl $1, %eax
190; X64-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000191; X64-NEXT: testl %eax, %eax
192; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000193; X64-NEXT: retq
194 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
195 %c = icmp ne i32 %m, 0
196 ret i1 %c
197}
198
199define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
200; X86-LABEL: length4:
201; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
203; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
204; X86-NEXT: movl (%ecx), %ecx
205; X86-NEXT: movl (%eax), %edx
206; X86-NEXT: bswapl %ecx
207; X86-NEXT: bswapl %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000208; X86-NEXT: xorl %eax, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000209; X86-NEXT: cmpl %edx, %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000210; X86-NEXT: seta %al
211; X86-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000212; X86-NEXT: retl
213;
214; X64-LABEL: length4:
215; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000216; X64-NEXT: movl (%rdi), %ecx
217; X64-NEXT: movl (%rsi), %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000218; X64-NEXT: bswapl %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000219; X64-NEXT: bswapl %edx
220; X64-NEXT: xorl %eax, %eax
221; X64-NEXT: cmpl %edx, %ecx
222; X64-NEXT: seta %al
223; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000224; X64-NEXT: retq
225 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
226 ret i32 %m
227}
228
229define i1 @length4_eq(i8* %X, i8* %Y) nounwind optsize {
230; X86-LABEL: length4_eq:
231; X86: # BB#0:
232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
233; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
234; X86-NEXT: movl (%ecx), %ecx
235; X86-NEXT: cmpl (%eax), %ecx
236; X86-NEXT: setne %al
237; X86-NEXT: retl
238;
239; X64-LABEL: length4_eq:
240; X64: # BB#0:
241; X64-NEXT: movl (%rdi), %eax
242; X64-NEXT: cmpl (%rsi), %eax
243; X64-NEXT: setne %al
244; X64-NEXT: retq
245 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
246 %c = icmp ne i32 %m, 0
247 ret i1 %c
248}
249
250define i1 @length4_eq_const(i8* %X) nounwind optsize {
251; X86-LABEL: length4_eq_const:
252; X86: # BB#0:
253; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
254; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
255; X86-NEXT: sete %al
256; X86-NEXT: retl
257;
258; X64-LABEL: length4_eq_const:
259; X64: # BB#0:
260; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
261; X64-NEXT: sete %al
262; X64-NEXT: retq
263 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
264 %c = icmp eq i32 %m, 0
265 ret i1 %c
266}
267
268define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
269; X86-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000270; X86: # BB#0: # %loadbb
271; X86-NEXT: pushl %esi
272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
273; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
274; X86-NEXT: movl (%eax), %edx
275; X86-NEXT: movl (%ecx), %esi
276; X86-NEXT: bswapl %edx
277; X86-NEXT: bswapl %esi
278; X86-NEXT: cmpl %esi, %edx
279; X86-NEXT: jne .LBB9_1
280; X86-NEXT: # BB#2: # %loadbb1
281; X86-NEXT: movzbl 4(%eax), %eax
282; X86-NEXT: movzbl 4(%ecx), %ecx
283; X86-NEXT: subl %ecx, %eax
284; X86-NEXT: jmp .LBB9_3
285; X86-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000286; X86-NEXT: setae %al
287; X86-NEXT: movzbl %al, %eax
288; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000289; X86-NEXT: .LBB9_3: # %endblock
290; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000291; X86-NEXT: retl
292;
293; X64-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000294; X64: # BB#0: # %loadbb
295; X64-NEXT: movl (%rdi), %eax
296; X64-NEXT: movl (%rsi), %ecx
297; X64-NEXT: bswapl %eax
298; X64-NEXT: bswapl %ecx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000299; X64-NEXT: cmpl %ecx, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000300; X64-NEXT: jne .LBB9_1
301; X64-NEXT: # BB#2: # %loadbb1
302; X64-NEXT: movzbl 4(%rdi), %eax
303; X64-NEXT: movzbl 4(%rsi), %ecx
304; X64-NEXT: subl %ecx, %eax
305; X64-NEXT: retq
306; X64-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000307; X64-NEXT: setae %al
308; X64-NEXT: movzbl %al, %eax
309; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000310; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000311 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
312 ret i32 %m
313}
314
315define i1 @length5_eq(i8* %X, i8* %Y) nounwind optsize {
316; X86-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000317; X86: # BB#0: # %loadbb
318; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
319; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
320; X86-NEXT: movl (%eax), %edx
321; X86-NEXT: cmpl (%ecx), %edx
322; X86-NEXT: jne .LBB10_1
323; X86-NEXT: # BB#2: # %loadbb1
324; X86-NEXT: movb 4(%eax), %dl
325; X86-NEXT: xorl %eax, %eax
326; X86-NEXT: cmpb 4(%ecx), %dl
327; X86-NEXT: je .LBB10_3
328; X86-NEXT: .LBB10_1: # %res_block
329; X86-NEXT: xorl %eax, %eax
330; X86-NEXT: incl %eax
331; X86-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000332; X86-NEXT: testl %eax, %eax
333; X86-NEXT: setne %al
334; X86-NEXT: retl
335;
336; X64-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000337; X64: # BB#0: # %loadbb
338; X64-NEXT: movl (%rdi), %eax
339; X64-NEXT: cmpl (%rsi), %eax
340; X64-NEXT: jne .LBB10_1
341; X64-NEXT: # BB#2: # %loadbb1
342; X64-NEXT: movb 4(%rdi), %cl
343; X64-NEXT: xorl %eax, %eax
344; X64-NEXT: cmpb 4(%rsi), %cl
345; X64-NEXT: je .LBB10_3
346; X64-NEXT: .LBB10_1: # %res_block
347; X64-NEXT: movl $1, %eax
348; X64-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000349; X64-NEXT: testl %eax, %eax
350; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000351; X64-NEXT: retq
352 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
353 %c = icmp ne i32 %m, 0
354 ret i1 %c
355}
356
357define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
358; X86-LABEL: length8:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000359; X86: # BB#0: # %loadbb
360; X86-NEXT: pushl %esi
361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
362; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
363; X86-NEXT: movl (%esi), %ecx
364; X86-NEXT: movl (%eax), %edx
365; X86-NEXT: bswapl %ecx
366; X86-NEXT: bswapl %edx
367; X86-NEXT: cmpl %edx, %ecx
368; X86-NEXT: jne .LBB11_1
369; X86-NEXT: # BB#2: # %loadbb1
370; X86-NEXT: movl 4(%esi), %ecx
371; X86-NEXT: movl 4(%eax), %edx
372; X86-NEXT: bswapl %ecx
373; X86-NEXT: bswapl %edx
374; X86-NEXT: xorl %eax, %eax
375; X86-NEXT: cmpl %edx, %ecx
376; X86-NEXT: je .LBB11_3
377; X86-NEXT: .LBB11_1: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000378; X86-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000379; X86-NEXT: cmpl %edx, %ecx
Sanjay Patel169dae72017-08-11 15:44:14 +0000380; X86-NEXT: setae %al
381; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000382; X86-NEXT: .LBB11_3: # %endblock
383; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000384; X86-NEXT: retl
385;
386; X64-LABEL: length8:
387; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000388; X64-NEXT: movq (%rdi), %rcx
389; X64-NEXT: movq (%rsi), %rdx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000390; X64-NEXT: bswapq %rcx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000391; X64-NEXT: bswapq %rdx
392; X64-NEXT: xorl %eax, %eax
393; X64-NEXT: cmpq %rdx, %rcx
394; X64-NEXT: seta %al
395; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000396; X64-NEXT: retq
397 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
398 ret i32 %m
399}
400
401define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
402; X86-LABEL: length8_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000403; X86: # BB#0: # %loadbb
404; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
405; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
406; X86-NEXT: movl (%eax), %edx
407; X86-NEXT: cmpl (%ecx), %edx
408; X86-NEXT: jne .LBB12_1
409; X86-NEXT: # BB#2: # %loadbb1
410; X86-NEXT: movl 4(%eax), %edx
411; X86-NEXT: xorl %eax, %eax
412; X86-NEXT: cmpl 4(%ecx), %edx
413; X86-NEXT: je .LBB12_3
414; X86-NEXT: .LBB12_1: # %res_block
415; X86-NEXT: xorl %eax, %eax
416; X86-NEXT: incl %eax
417; X86-NEXT: .LBB12_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000418; X86-NEXT: testl %eax, %eax
419; X86-NEXT: sete %al
420; X86-NEXT: retl
421;
422; X64-LABEL: length8_eq:
423; X64: # BB#0:
424; X64-NEXT: movq (%rdi), %rax
425; X64-NEXT: cmpq (%rsi), %rax
426; X64-NEXT: sete %al
427; X64-NEXT: retq
428 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
429 %c = icmp eq i32 %m, 0
430 ret i1 %c
431}
432
433define i1 @length8_eq_const(i8* %X) nounwind optsize {
434; X86-LABEL: length8_eq_const:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000435; X86: # BB#0: # %loadbb
436; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
437; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
438; X86-NEXT: jne .LBB13_1
439; X86-NEXT: # BB#2: # %loadbb1
440; X86-NEXT: xorl %eax, %eax
441; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
442; X86-NEXT: je .LBB13_3
443; X86-NEXT: .LBB13_1: # %res_block
444; X86-NEXT: xorl %eax, %eax
445; X86-NEXT: incl %eax
446; X86-NEXT: .LBB13_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000447; X86-NEXT: testl %eax, %eax
448; X86-NEXT: setne %al
449; X86-NEXT: retl
450;
451; X64-LABEL: length8_eq_const:
452; X64: # BB#0:
453; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
454; X64-NEXT: cmpq %rax, (%rdi)
455; X64-NEXT: setne %al
456; X64-NEXT: retq
457 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
458 %c = icmp ne i32 %m, 0
459 ret i1 %c
460}
461
462define i1 @length12_eq(i8* %X, i8* %Y) nounwind optsize {
463; X86-LABEL: length12_eq:
464; X86: # BB#0:
465; X86-NEXT: pushl $0
466; X86-NEXT: pushl $12
467; X86-NEXT: pushl {{[0-9]+}}(%esp)
468; X86-NEXT: pushl {{[0-9]+}}(%esp)
469; X86-NEXT: calll memcmp
470; X86-NEXT: addl $16, %esp
471; X86-NEXT: testl %eax, %eax
472; X86-NEXT: setne %al
473; X86-NEXT: retl
474;
475; X64-LABEL: length12_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000476; X64: # BB#0: # %loadbb
477; X64-NEXT: movq (%rdi), %rax
478; X64-NEXT: cmpq (%rsi), %rax
479; X64-NEXT: jne .LBB14_1
480; X64-NEXT: # BB#2: # %loadbb1
481; X64-NEXT: movl 8(%rdi), %ecx
482; X64-NEXT: xorl %eax, %eax
483; X64-NEXT: cmpl 8(%rsi), %ecx
484; X64-NEXT: je .LBB14_3
485; X64-NEXT: .LBB14_1: # %res_block
486; X64-NEXT: movl $1, %eax
487; X64-NEXT: .LBB14_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000488; X64-NEXT: testl %eax, %eax
489; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000490; X64-NEXT: retq
491 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
492 %c = icmp ne i32 %m, 0
493 ret i1 %c
494}
495
496define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
497; X86-LABEL: length12:
498; X86: # BB#0:
499; X86-NEXT: pushl $0
500; X86-NEXT: pushl $12
501; X86-NEXT: pushl {{[0-9]+}}(%esp)
502; X86-NEXT: pushl {{[0-9]+}}(%esp)
503; X86-NEXT: calll memcmp
504; X86-NEXT: addl $16, %esp
505; X86-NEXT: retl
506;
507; X64-LABEL: length12:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000508; X64: # BB#0: # %loadbb
509; X64-NEXT: movq (%rdi), %rcx
510; X64-NEXT: movq (%rsi), %rdx
511; X64-NEXT: bswapq %rcx
512; X64-NEXT: bswapq %rdx
513; X64-NEXT: cmpq %rdx, %rcx
514; X64-NEXT: jne .LBB15_1
515; X64-NEXT: # BB#2: # %loadbb1
516; X64-NEXT: movl 8(%rdi), %ecx
517; X64-NEXT: movl 8(%rsi), %edx
518; X64-NEXT: bswapl %ecx
519; X64-NEXT: bswapl %edx
520; X64-NEXT: xorl %eax, %eax
521; X64-NEXT: cmpq %rdx, %rcx
522; X64-NEXT: jne .LBB15_1
523; X64-NEXT: # BB#3: # %endblock
524; X64-NEXT: retq
525; X64-NEXT: .LBB15_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000526; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000527; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000528; X64-NEXT: setae %al
529; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000530; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000531 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
532 ret i32 %m
533}
534
535; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
536
537define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
538; X86-LABEL: length16:
539; X86: # BB#0:
540; X86-NEXT: pushl $0
541; X86-NEXT: pushl $16
542; X86-NEXT: pushl {{[0-9]+}}(%esp)
543; X86-NEXT: pushl {{[0-9]+}}(%esp)
544; X86-NEXT: calll memcmp
545; X86-NEXT: addl $16, %esp
546; X86-NEXT: retl
547;
548; X64-LABEL: length16:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000549; X64: # BB#0: # %loadbb
550; X64-NEXT: movq (%rdi), %rcx
551; X64-NEXT: movq (%rsi), %rdx
552; X64-NEXT: bswapq %rcx
553; X64-NEXT: bswapq %rdx
554; X64-NEXT: cmpq %rdx, %rcx
555; X64-NEXT: jne .LBB16_1
556; X64-NEXT: # BB#2: # %loadbb1
557; X64-NEXT: movq 8(%rdi), %rcx
558; X64-NEXT: movq 8(%rsi), %rdx
559; X64-NEXT: bswapq %rcx
560; X64-NEXT: bswapq %rdx
561; X64-NEXT: xorl %eax, %eax
562; X64-NEXT: cmpq %rdx, %rcx
563; X64-NEXT: jne .LBB16_1
564; X64-NEXT: # BB#3: # %endblock
565; X64-NEXT: retq
566; X64-NEXT: .LBB16_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000567; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000568; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000569; X64-NEXT: setae %al
570; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000571; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000572 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
573 ret i32 %m
574}
575
576define i1 @length16_eq(i8* %x, i8* %y) nounwind optsize {
577; X86-NOSSE-LABEL: length16_eq:
578; X86-NOSSE: # BB#0:
579; X86-NOSSE-NEXT: pushl $0
580; X86-NOSSE-NEXT: pushl $16
581; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
582; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
583; X86-NOSSE-NEXT: calll memcmp
584; X86-NOSSE-NEXT: addl $16, %esp
585; X86-NOSSE-NEXT: testl %eax, %eax
586; X86-NOSSE-NEXT: setne %al
587; X86-NOSSE-NEXT: retl
588;
589; X86-SSE2-LABEL: length16_eq:
590; X86-SSE2: # BB#0:
591; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
592; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
593; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
594; X86-SSE2-NEXT: movdqu (%eax), %xmm1
595; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
596; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
597; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
598; X86-SSE2-NEXT: setne %al
599; X86-SSE2-NEXT: retl
600;
Simon Pilgrim483927a2017-07-18 15:55:30 +0000601; X64-LABEL: length16_eq:
602; X64: # BB#0: # %loadbb
603; X64-NEXT: movq (%rdi), %rax
604; X64-NEXT: cmpq (%rsi), %rax
605; X64-NEXT: jne .LBB17_1
606; X64-NEXT: # BB#2: # %loadbb1
607; X64-NEXT: movq 8(%rdi), %rcx
608; X64-NEXT: xorl %eax, %eax
609; X64-NEXT: cmpq 8(%rsi), %rcx
610; X64-NEXT: je .LBB17_3
611; X64-NEXT: .LBB17_1: # %res_block
612; X64-NEXT: movl $1, %eax
613; X64-NEXT: .LBB17_3: # %endblock
614; X64-NEXT: testl %eax, %eax
615; X64-NEXT: setne %al
616; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000617 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
618 %cmp = icmp ne i32 %call, 0
619 ret i1 %cmp
620}
621
622define i1 @length16_eq_const(i8* %X) nounwind optsize {
623; X86-NOSSE-LABEL: length16_eq_const:
624; X86-NOSSE: # BB#0:
625; X86-NOSSE-NEXT: pushl $0
626; X86-NOSSE-NEXT: pushl $16
627; X86-NOSSE-NEXT: pushl $.L.str
628; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
629; X86-NOSSE-NEXT: calll memcmp
630; X86-NOSSE-NEXT: addl $16, %esp
631; X86-NOSSE-NEXT: testl %eax, %eax
632; X86-NOSSE-NEXT: sete %al
633; X86-NOSSE-NEXT: retl
634;
635; X86-SSE2-LABEL: length16_eq_const:
636; X86-SSE2: # BB#0:
637; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
638; X86-SSE2-NEXT: movdqu (%eax), %xmm0
639; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
640; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
641; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
642; X86-SSE2-NEXT: sete %al
643; X86-SSE2-NEXT: retl
644;
Simon Pilgrim483927a2017-07-18 15:55:30 +0000645; X64-LABEL: length16_eq_const:
646; X64: # BB#0: # %loadbb
647; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
648; X64-NEXT: cmpq %rax, (%rdi)
649; X64-NEXT: jne .LBB18_1
650; X64-NEXT: # BB#2: # %loadbb1
651; X64-NEXT: xorl %eax, %eax
652; X64-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
653; X64-NEXT: cmpq %rcx, 8(%rdi)
654; X64-NEXT: je .LBB18_3
655; X64-NEXT: .LBB18_1: # %res_block
656; X64-NEXT: movl $1, %eax
657; X64-NEXT: .LBB18_3: # %endblock
658; X64-NEXT: testl %eax, %eax
659; X64-NEXT: sete %al
660; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000661 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
662 %c = icmp eq i32 %m, 0
663 ret i1 %c
664}
665
Simon Pilgrim3459f102017-07-25 10:33:36 +0000666; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
667
668define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
669; X86-LABEL: length24:
670; X86: # BB#0:
671; X86-NEXT: pushl $0
672; X86-NEXT: pushl $24
673; X86-NEXT: pushl {{[0-9]+}}(%esp)
674; X86-NEXT: pushl {{[0-9]+}}(%esp)
675; X86-NEXT: calll memcmp
676; X86-NEXT: addl $16, %esp
677; X86-NEXT: retl
678;
679; X64-LABEL: length24:
680; X64: # BB#0:
681; X64-NEXT: movl $24, %edx
682; X64-NEXT: jmp memcmp # TAILCALL
683 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
684 ret i32 %m
685}
686
687define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
688; X86-LABEL: length24_eq:
689; X86: # BB#0:
690; X86-NEXT: pushl $0
691; X86-NEXT: pushl $24
692; X86-NEXT: pushl {{[0-9]+}}(%esp)
693; X86-NEXT: pushl {{[0-9]+}}(%esp)
694; X86-NEXT: calll memcmp
695; X86-NEXT: addl $16, %esp
696; X86-NEXT: testl %eax, %eax
697; X86-NEXT: sete %al
698; X86-NEXT: retl
699;
700; X64-LABEL: length24_eq:
701; X64: # BB#0:
702; X64-NEXT: pushq %rax
703; X64-NEXT: movl $24, %edx
704; X64-NEXT: callq memcmp
705; X64-NEXT: testl %eax, %eax
706; X64-NEXT: sete %al
707; X64-NEXT: popq %rcx
708; X64-NEXT: retq
709 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
710 %cmp = icmp eq i32 %call, 0
711 ret i1 %cmp
712}
713
714define i1 @length24_eq_const(i8* %X) nounwind optsize {
715; X86-LABEL: length24_eq_const:
716; X86: # BB#0:
717; X86-NEXT: pushl $0
718; X86-NEXT: pushl $24
719; X86-NEXT: pushl $.L.str
720; X86-NEXT: pushl {{[0-9]+}}(%esp)
721; X86-NEXT: calll memcmp
722; X86-NEXT: addl $16, %esp
723; X86-NEXT: testl %eax, %eax
724; X86-NEXT: setne %al
725; X86-NEXT: retl
726;
727; X64-LABEL: length24_eq_const:
728; X64: # BB#0:
729; X64-NEXT: pushq %rax
730; X64-NEXT: movl $.L.str, %esi
731; X64-NEXT: movl $24, %edx
732; X64-NEXT: callq memcmp
733; X64-NEXT: testl %eax, %eax
734; X64-NEXT: setne %al
735; X64-NEXT: popq %rcx
736; X64-NEXT: retq
737 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
738 %c = icmp ne i32 %m, 0
739 ret i1 %c
740}
741
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000742define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
743; X86-LABEL: length32:
744; X86: # BB#0:
745; X86-NEXT: pushl $0
746; X86-NEXT: pushl $32
747; X86-NEXT: pushl {{[0-9]+}}(%esp)
748; X86-NEXT: pushl {{[0-9]+}}(%esp)
749; X86-NEXT: calll memcmp
750; X86-NEXT: addl $16, %esp
751; X86-NEXT: retl
752;
753; X64-LABEL: length32:
754; X64: # BB#0:
755; X64-NEXT: movl $32, %edx
756; X64-NEXT: jmp memcmp # TAILCALL
757 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
758 ret i32 %m
759}
760
761; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
762
763define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
764; X86-LABEL: length32_eq:
765; X86: # BB#0:
766; X86-NEXT: pushl $0
767; X86-NEXT: pushl $32
768; X86-NEXT: pushl {{[0-9]+}}(%esp)
769; X86-NEXT: pushl {{[0-9]+}}(%esp)
770; X86-NEXT: calll memcmp
771; X86-NEXT: addl $16, %esp
772; X86-NEXT: testl %eax, %eax
773; X86-NEXT: sete %al
774; X86-NEXT: retl
775;
776; X64-SSE2-LABEL: length32_eq:
777; X64-SSE2: # BB#0:
778; X64-SSE2-NEXT: pushq %rax
779; X64-SSE2-NEXT: movl $32, %edx
780; X64-SSE2-NEXT: callq memcmp
781; X64-SSE2-NEXT: testl %eax, %eax
782; X64-SSE2-NEXT: sete %al
783; X64-SSE2-NEXT: popq %rcx
784; X64-SSE2-NEXT: retq
785;
786; X64-AVX2-LABEL: length32_eq:
787; X64-AVX2: # BB#0:
788; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
789; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
790; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
791; X64-AVX2-NEXT: cmpl $-1, %eax
792; X64-AVX2-NEXT: sete %al
793; X64-AVX2-NEXT: vzeroupper
794; X64-AVX2-NEXT: retq
795 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
796 %cmp = icmp eq i32 %call, 0
797 ret i1 %cmp
798}
799
800define i1 @length32_eq_const(i8* %X) nounwind optsize {
801; X86-LABEL: length32_eq_const:
802; X86: # BB#0:
803; X86-NEXT: pushl $0
804; X86-NEXT: pushl $32
805; X86-NEXT: pushl $.L.str
806; X86-NEXT: pushl {{[0-9]+}}(%esp)
807; X86-NEXT: calll memcmp
808; X86-NEXT: addl $16, %esp
809; X86-NEXT: testl %eax, %eax
810; X86-NEXT: setne %al
811; X86-NEXT: retl
812;
813; X64-SSE2-LABEL: length32_eq_const:
814; X64-SSE2: # BB#0:
815; X64-SSE2-NEXT: pushq %rax
816; X64-SSE2-NEXT: movl $.L.str, %esi
817; X64-SSE2-NEXT: movl $32, %edx
818; X64-SSE2-NEXT: callq memcmp
819; X64-SSE2-NEXT: testl %eax, %eax
820; X64-SSE2-NEXT: setne %al
821; X64-SSE2-NEXT: popq %rcx
822; X64-SSE2-NEXT: retq
823;
824; X64-AVX2-LABEL: length32_eq_const:
825; X64-AVX2: # BB#0:
826; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
827; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
828; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
829; X64-AVX2-NEXT: cmpl $-1, %eax
830; X64-AVX2-NEXT: setne %al
831; X64-AVX2-NEXT: vzeroupper
832; X64-AVX2-NEXT: retq
833 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
834 %c = icmp ne i32 %m, 0
835 ret i1 %c
836}
837
838define i32 @length64(i8* %X, i8* %Y) nounwind optsize {
839; X86-LABEL: length64:
840; X86: # BB#0:
841; X86-NEXT: pushl $0
842; X86-NEXT: pushl $64
843; X86-NEXT: pushl {{[0-9]+}}(%esp)
844; X86-NEXT: pushl {{[0-9]+}}(%esp)
845; X86-NEXT: calll memcmp
846; X86-NEXT: addl $16, %esp
847; X86-NEXT: retl
848;
849; X64-LABEL: length64:
850; X64: # BB#0:
851; X64-NEXT: movl $64, %edx
852; X64-NEXT: jmp memcmp # TAILCALL
853 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
854 ret i32 %m
855}
856
857define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
858; X86-LABEL: length64_eq:
859; X86: # BB#0:
860; X86-NEXT: pushl $0
861; X86-NEXT: pushl $64
862; X86-NEXT: pushl {{[0-9]+}}(%esp)
863; X86-NEXT: pushl {{[0-9]+}}(%esp)
864; X86-NEXT: calll memcmp
865; X86-NEXT: addl $16, %esp
866; X86-NEXT: testl %eax, %eax
867; X86-NEXT: setne %al
868; X86-NEXT: retl
869;
870; X64-LABEL: length64_eq:
871; X64: # BB#0:
872; X64-NEXT: pushq %rax
873; X64-NEXT: movl $64, %edx
874; X64-NEXT: callq memcmp
875; X64-NEXT: testl %eax, %eax
876; X64-NEXT: setne %al
877; X64-NEXT: popq %rcx
878; X64-NEXT: retq
879 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
880 %cmp = icmp ne i32 %call, 0
881 ret i1 %cmp
882}
883
884define i1 @length64_eq_const(i8* %X) nounwind optsize {
885; X86-LABEL: length64_eq_const:
886; X86: # BB#0:
887; X86-NEXT: pushl $0
888; X86-NEXT: pushl $64
889; X86-NEXT: pushl $.L.str
890; X86-NEXT: pushl {{[0-9]+}}(%esp)
891; X86-NEXT: calll memcmp
892; X86-NEXT: addl $16, %esp
893; X86-NEXT: testl %eax, %eax
894; X86-NEXT: sete %al
895; X86-NEXT: retl
896;
897; X64-LABEL: length64_eq_const:
898; X64: # BB#0:
899; X64-NEXT: pushq %rax
900; X64-NEXT: movl $.L.str, %esi
901; X64-NEXT: movl $64, %edx
902; X64-NEXT: callq memcmp
903; X64-NEXT: testl %eax, %eax
904; X64-NEXT: sete %al
905; X64-NEXT: popq %rcx
906; X64-NEXT: retq
907 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
908 %c = icmp eq i32 %m, 0
909 ret i1 %c
910}
911