blob: 77d9fa69182b884bbfd682821d750b22dfb43cd8 [file] [log] [blame]
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
6
7; This tests codegen time inlining/optimization of memcmp
8; rdar://6480398
9
10@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
11
12declare i32 @memcmp(i8*, i8*, i64)
13
14define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
15; X86-LABEL: length2:
16; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000017; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
18; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
19; X86-NEXT: movzwl (%ecx), %ecx
20; X86-NEXT: movzwl (%eax), %edx
21; X86-NEXT: rolw $8, %cx
22; X86-NEXT: rolw $8, %dx
Sanjay Patelfea731a2017-07-31 18:08:24 +000023; X86-NEXT: movzwl %cx, %eax
24; X86-NEXT: movzwl %dx, %ecx
25; X86-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000026; X86-NEXT: retl
27;
28; X64-LABEL: length2:
29; X64: # BB#0:
30; X64-NEXT: movzwl (%rdi), %eax
31; X64-NEXT: movzwl (%rsi), %ecx
32; X64-NEXT: rolw $8, %ax
33; X64-NEXT: rolw $8, %cx
Sanjay Patelfea731a2017-07-31 18:08:24 +000034; X64-NEXT: movzwl %ax, %eax
35; X64-NEXT: movzwl %cx, %ecx
36; X64-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000037; X64-NEXT: retq
38 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
39 ret i32 %m
40}
41
42define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {
43; X86-LABEL: length2_eq:
44; X86: # BB#0:
45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
47; X86-NEXT: movzwl (%ecx), %ecx
48; X86-NEXT: cmpw (%eax), %cx
49; X86-NEXT: sete %al
50; X86-NEXT: retl
51;
52; X64-LABEL: length2_eq:
53; X64: # BB#0:
54; X64-NEXT: movzwl (%rdi), %eax
55; X64-NEXT: cmpw (%rsi), %ax
56; X64-NEXT: sete %al
57; X64-NEXT: retq
58 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
59 %c = icmp eq i32 %m, 0
60 ret i1 %c
61}
62
63define i1 @length2_eq_const(i8* %X) nounwind optsize {
64; X86-LABEL: length2_eq_const:
65; X86: # BB#0:
66; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT: movzwl (%eax), %eax
68; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
69; X86-NEXT: setne %al
70; X86-NEXT: retl
71;
72; X64-LABEL: length2_eq_const:
73; X64: # BB#0:
74; X64-NEXT: movzwl (%rdi), %eax
75; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
76; X64-NEXT: setne %al
77; X64-NEXT: retq
78 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
79 %c = icmp ne i32 %m, 0
80 ret i1 %c
81}
82
83define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind optsize {
84; X86-LABEL: length2_eq_nobuiltin_attr:
85; X86: # BB#0:
86; X86-NEXT: pushl $0
87; X86-NEXT: pushl $2
88; X86-NEXT: pushl {{[0-9]+}}(%esp)
89; X86-NEXT: pushl {{[0-9]+}}(%esp)
90; X86-NEXT: calll memcmp
91; X86-NEXT: addl $16, %esp
92; X86-NEXT: testl %eax, %eax
93; X86-NEXT: sete %al
94; X86-NEXT: retl
95;
96; X64-LABEL: length2_eq_nobuiltin_attr:
97; X64: # BB#0:
98; X64-NEXT: pushq %rax
99; X64-NEXT: movl $2, %edx
100; X64-NEXT: callq memcmp
101; X64-NEXT: testl %eax, %eax
102; X64-NEXT: sete %al
103; X64-NEXT: popq %rcx
104; X64-NEXT: retq
105 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
106 %c = icmp eq i32 %m, 0
107 ret i1 %c
108}
109
110define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
111; X86-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000112; X86: # BB#0: # %loadbb
113; X86-NEXT: pushl %esi
114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT: movzwl (%eax), %edx
117; X86-NEXT: movzwl (%ecx), %esi
118; X86-NEXT: rolw $8, %dx
119; X86-NEXT: rolw $8, %si
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000120; X86-NEXT: cmpw %si, %dx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000121; X86-NEXT: jne .LBB4_1
122; X86-NEXT: # BB#2: # %loadbb1
123; X86-NEXT: movzbl 2(%eax), %eax
124; X86-NEXT: movzbl 2(%ecx), %ecx
125; X86-NEXT: subl %ecx, %eax
126; X86-NEXT: jmp .LBB4_3
127; X86-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000128; X86-NEXT: setae %al
129; X86-NEXT: movzbl %al, %eax
130; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000131; X86-NEXT: .LBB4_3: # %endblock
132; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000133; X86-NEXT: retl
134;
135; X64-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000136; X64: # BB#0: # %loadbb
137; X64-NEXT: movzwl (%rdi), %eax
138; X64-NEXT: movzwl (%rsi), %ecx
139; X64-NEXT: rolw $8, %ax
140; X64-NEXT: rolw $8, %cx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000141; X64-NEXT: cmpw %cx, %ax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000142; X64-NEXT: jne .LBB4_1
143; X64-NEXT: # BB#2: # %loadbb1
144; X64-NEXT: movzbl 2(%rdi), %eax
145; X64-NEXT: movzbl 2(%rsi), %ecx
146; X64-NEXT: subl %ecx, %eax
147; X64-NEXT: retq
148; X64-NEXT: .LBB4_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000149; X64-NEXT: setae %al
150; X64-NEXT: movzbl %al, %eax
151; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000152; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000153 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
154 ret i32 %m
155}
156
157define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
158; X86-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000159; X86: # BB#0: # %loadbb
160; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
162; X86-NEXT: movzwl (%eax), %edx
163; X86-NEXT: cmpw (%ecx), %dx
164; X86-NEXT: jne .LBB5_1
165; X86-NEXT: # BB#2: # %loadbb1
166; X86-NEXT: movb 2(%eax), %dl
167; X86-NEXT: xorl %eax, %eax
168; X86-NEXT: cmpb 2(%ecx), %dl
169; X86-NEXT: je .LBB5_3
170; X86-NEXT: .LBB5_1: # %res_block
171; X86-NEXT: xorl %eax, %eax
172; X86-NEXT: incl %eax
173; X86-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000174; X86-NEXT: testl %eax, %eax
175; X86-NEXT: setne %al
176; X86-NEXT: retl
177;
178; X64-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000179; X64: # BB#0: # %loadbb
180; X64-NEXT: movzwl (%rdi), %eax
181; X64-NEXT: cmpw (%rsi), %ax
182; X64-NEXT: jne .LBB5_1
183; X64-NEXT: # BB#2: # %loadbb1
184; X64-NEXT: movb 2(%rdi), %cl
185; X64-NEXT: xorl %eax, %eax
186; X64-NEXT: cmpb 2(%rsi), %cl
187; X64-NEXT: je .LBB5_3
188; X64-NEXT: .LBB5_1: # %res_block
189; X64-NEXT: movl $1, %eax
190; X64-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000191; X64-NEXT: testl %eax, %eax
192; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000193; X64-NEXT: retq
194 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
195 %c = icmp ne i32 %m, 0
196 ret i1 %c
197}
198
199define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
200; X86-LABEL: length4:
201; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
203; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
204; X86-NEXT: movl (%ecx), %ecx
205; X86-NEXT: movl (%eax), %edx
206; X86-NEXT: bswapl %ecx
207; X86-NEXT: bswapl %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000208; X86-NEXT: xorl %eax, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000209; X86-NEXT: cmpl %edx, %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000210; X86-NEXT: seta %al
211; X86-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000212; X86-NEXT: retl
213;
214; X64-LABEL: length4:
215; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000216; X64-NEXT: movl (%rdi), %ecx
217; X64-NEXT: movl (%rsi), %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000218; X64-NEXT: bswapl %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000219; X64-NEXT: bswapl %edx
220; X64-NEXT: xorl %eax, %eax
221; X64-NEXT: cmpl %edx, %ecx
222; X64-NEXT: seta %al
223; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000224; X64-NEXT: retq
225 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
226 ret i32 %m
227}
228
229define i1 @length4_eq(i8* %X, i8* %Y) nounwind optsize {
230; X86-LABEL: length4_eq:
231; X86: # BB#0:
232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
233; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
234; X86-NEXT: movl (%ecx), %ecx
235; X86-NEXT: cmpl (%eax), %ecx
236; X86-NEXT: setne %al
237; X86-NEXT: retl
238;
239; X64-LABEL: length4_eq:
240; X64: # BB#0:
241; X64-NEXT: movl (%rdi), %eax
242; X64-NEXT: cmpl (%rsi), %eax
243; X64-NEXT: setne %al
244; X64-NEXT: retq
245 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
246 %c = icmp ne i32 %m, 0
247 ret i1 %c
248}
249
250define i1 @length4_eq_const(i8* %X) nounwind optsize {
251; X86-LABEL: length4_eq_const:
252; X86: # BB#0:
253; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
254; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
255; X86-NEXT: sete %al
256; X86-NEXT: retl
257;
258; X64-LABEL: length4_eq_const:
259; X64: # BB#0:
260; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
261; X64-NEXT: sete %al
262; X64-NEXT: retq
263 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
264 %c = icmp eq i32 %m, 0
265 ret i1 %c
266}
267
268define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
269; X86-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000270; X86: # BB#0: # %loadbb
271; X86-NEXT: pushl %esi
272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
273; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
274; X86-NEXT: movl (%eax), %edx
275; X86-NEXT: movl (%ecx), %esi
276; X86-NEXT: bswapl %edx
277; X86-NEXT: bswapl %esi
278; X86-NEXT: cmpl %esi, %edx
279; X86-NEXT: jne .LBB9_1
280; X86-NEXT: # BB#2: # %loadbb1
281; X86-NEXT: movzbl 4(%eax), %eax
282; X86-NEXT: movzbl 4(%ecx), %ecx
283; X86-NEXT: subl %ecx, %eax
284; X86-NEXT: jmp .LBB9_3
285; X86-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000286; X86-NEXT: setae %al
287; X86-NEXT: movzbl %al, %eax
288; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000289; X86-NEXT: .LBB9_3: # %endblock
290; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000291; X86-NEXT: retl
292;
293; X64-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000294; X64: # BB#0: # %loadbb
295; X64-NEXT: movl (%rdi), %eax
296; X64-NEXT: movl (%rsi), %ecx
297; X64-NEXT: bswapl %eax
298; X64-NEXT: bswapl %ecx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000299; X64-NEXT: cmpl %ecx, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000300; X64-NEXT: jne .LBB9_1
301; X64-NEXT: # BB#2: # %loadbb1
302; X64-NEXT: movzbl 4(%rdi), %eax
303; X64-NEXT: movzbl 4(%rsi), %ecx
304; X64-NEXT: subl %ecx, %eax
305; X64-NEXT: retq
306; X64-NEXT: .LBB9_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000307; X64-NEXT: setae %al
308; X64-NEXT: movzbl %al, %eax
309; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000310; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000311 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
312 ret i32 %m
313}
314
315define i1 @length5_eq(i8* %X, i8* %Y) nounwind optsize {
316; X86-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000317; X86: # BB#0: # %loadbb
318; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
319; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
320; X86-NEXT: movl (%eax), %edx
321; X86-NEXT: cmpl (%ecx), %edx
322; X86-NEXT: jne .LBB10_1
323; X86-NEXT: # BB#2: # %loadbb1
324; X86-NEXT: movb 4(%eax), %dl
325; X86-NEXT: xorl %eax, %eax
326; X86-NEXT: cmpb 4(%ecx), %dl
327; X86-NEXT: je .LBB10_3
328; X86-NEXT: .LBB10_1: # %res_block
329; X86-NEXT: xorl %eax, %eax
330; X86-NEXT: incl %eax
331; X86-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000332; X86-NEXT: testl %eax, %eax
333; X86-NEXT: setne %al
334; X86-NEXT: retl
335;
336; X64-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000337; X64: # BB#0: # %loadbb
338; X64-NEXT: movl (%rdi), %eax
339; X64-NEXT: cmpl (%rsi), %eax
340; X64-NEXT: jne .LBB10_1
341; X64-NEXT: # BB#2: # %loadbb1
342; X64-NEXT: movb 4(%rdi), %cl
343; X64-NEXT: xorl %eax, %eax
344; X64-NEXT: cmpb 4(%rsi), %cl
345; X64-NEXT: je .LBB10_3
346; X64-NEXT: .LBB10_1: # %res_block
347; X64-NEXT: movl $1, %eax
348; X64-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000349; X64-NEXT: testl %eax, %eax
350; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000351; X64-NEXT: retq
352 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
353 %c = icmp ne i32 %m, 0
354 ret i1 %c
355}
356
357define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
358; X86-LABEL: length8:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000359; X86: # BB#0: # %loadbb
360; X86-NEXT: pushl %esi
361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
362; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
363; X86-NEXT: movl (%esi), %ecx
364; X86-NEXT: movl (%eax), %edx
365; X86-NEXT: bswapl %ecx
366; X86-NEXT: bswapl %edx
367; X86-NEXT: cmpl %edx, %ecx
368; X86-NEXT: jne .LBB11_1
369; X86-NEXT: # BB#2: # %loadbb1
370; X86-NEXT: movl 4(%esi), %ecx
371; X86-NEXT: movl 4(%eax), %edx
372; X86-NEXT: bswapl %ecx
373; X86-NEXT: bswapl %edx
374; X86-NEXT: xorl %eax, %eax
375; X86-NEXT: cmpl %edx, %ecx
376; X86-NEXT: je .LBB11_3
377; X86-NEXT: .LBB11_1: # %res_block
Simon Pilgrim483927a2017-07-18 15:55:30 +0000378; X86-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000379; X86-NEXT: cmpl %edx, %ecx
Sanjay Patel169dae72017-08-11 15:44:14 +0000380; X86-NEXT: setae %al
381; X86-NEXT: leal -1(%eax,%eax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000382; X86-NEXT: .LBB11_3: # %endblock
383; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000384; X86-NEXT: retl
385;
386; X64-LABEL: length8:
387; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000388; X64-NEXT: movq (%rdi), %rcx
389; X64-NEXT: movq (%rsi), %rdx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000390; X64-NEXT: bswapq %rcx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000391; X64-NEXT: bswapq %rdx
392; X64-NEXT: xorl %eax, %eax
393; X64-NEXT: cmpq %rdx, %rcx
394; X64-NEXT: seta %al
395; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000396; X64-NEXT: retq
397 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
398 ret i32 %m
399}
400
401define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
402; X86-LABEL: length8_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000403; X86: # BB#0: # %loadbb
404; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
405; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
406; X86-NEXT: movl (%eax), %edx
407; X86-NEXT: cmpl (%ecx), %edx
408; X86-NEXT: jne .LBB12_1
409; X86-NEXT: # BB#2: # %loadbb1
410; X86-NEXT: movl 4(%eax), %edx
411; X86-NEXT: xorl %eax, %eax
412; X86-NEXT: cmpl 4(%ecx), %edx
413; X86-NEXT: je .LBB12_3
414; X86-NEXT: .LBB12_1: # %res_block
415; X86-NEXT: xorl %eax, %eax
416; X86-NEXT: incl %eax
417; X86-NEXT: .LBB12_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000418; X86-NEXT: testl %eax, %eax
419; X86-NEXT: sete %al
420; X86-NEXT: retl
421;
422; X64-LABEL: length8_eq:
423; X64: # BB#0:
424; X64-NEXT: movq (%rdi), %rax
425; X64-NEXT: cmpq (%rsi), %rax
426; X64-NEXT: sete %al
427; X64-NEXT: retq
428 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
429 %c = icmp eq i32 %m, 0
430 ret i1 %c
431}
432
433define i1 @length8_eq_const(i8* %X) nounwind optsize {
434; X86-LABEL: length8_eq_const:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000435; X86: # BB#0: # %loadbb
436; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
437; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
438; X86-NEXT: jne .LBB13_1
439; X86-NEXT: # BB#2: # %loadbb1
440; X86-NEXT: xorl %eax, %eax
441; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
442; X86-NEXT: je .LBB13_3
443; X86-NEXT: .LBB13_1: # %res_block
444; X86-NEXT: xorl %eax, %eax
445; X86-NEXT: incl %eax
446; X86-NEXT: .LBB13_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000447; X86-NEXT: testl %eax, %eax
448; X86-NEXT: setne %al
449; X86-NEXT: retl
450;
451; X64-LABEL: length8_eq_const:
452; X64: # BB#0:
453; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
454; X64-NEXT: cmpq %rax, (%rdi)
455; X64-NEXT: setne %al
456; X64-NEXT: retq
457 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
458 %c = icmp ne i32 %m, 0
459 ret i1 %c
460}
461
462define i1 @length12_eq(i8* %X, i8* %Y) nounwind optsize {
463; X86-LABEL: length12_eq:
464; X86: # BB#0:
465; X86-NEXT: pushl $0
466; X86-NEXT: pushl $12
467; X86-NEXT: pushl {{[0-9]+}}(%esp)
468; X86-NEXT: pushl {{[0-9]+}}(%esp)
469; X86-NEXT: calll memcmp
470; X86-NEXT: addl $16, %esp
471; X86-NEXT: testl %eax, %eax
472; X86-NEXT: setne %al
473; X86-NEXT: retl
474;
475; X64-LABEL: length12_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000476; X64: # BB#0: # %loadbb
477; X64-NEXT: movq (%rdi), %rax
478; X64-NEXT: cmpq (%rsi), %rax
479; X64-NEXT: jne .LBB14_1
480; X64-NEXT: # BB#2: # %loadbb1
481; X64-NEXT: movl 8(%rdi), %ecx
482; X64-NEXT: xorl %eax, %eax
483; X64-NEXT: cmpl 8(%rsi), %ecx
484; X64-NEXT: je .LBB14_3
485; X64-NEXT: .LBB14_1: # %res_block
486; X64-NEXT: movl $1, %eax
487; X64-NEXT: .LBB14_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000488; X64-NEXT: testl %eax, %eax
489; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000490; X64-NEXT: retq
491 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
492 %c = icmp ne i32 %m, 0
493 ret i1 %c
494}
495
496define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
497; X86-LABEL: length12:
498; X86: # BB#0:
499; X86-NEXT: pushl $0
500; X86-NEXT: pushl $12
501; X86-NEXT: pushl {{[0-9]+}}(%esp)
502; X86-NEXT: pushl {{[0-9]+}}(%esp)
503; X86-NEXT: calll memcmp
504; X86-NEXT: addl $16, %esp
505; X86-NEXT: retl
506;
507; X64-LABEL: length12:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000508; X64: # BB#0: # %loadbb
509; X64-NEXT: movq (%rdi), %rcx
510; X64-NEXT: movq (%rsi), %rdx
511; X64-NEXT: bswapq %rcx
512; X64-NEXT: bswapq %rdx
513; X64-NEXT: cmpq %rdx, %rcx
514; X64-NEXT: jne .LBB15_1
515; X64-NEXT: # BB#2: # %loadbb1
516; X64-NEXT: movl 8(%rdi), %ecx
517; X64-NEXT: movl 8(%rsi), %edx
518; X64-NEXT: bswapl %ecx
519; X64-NEXT: bswapl %edx
520; X64-NEXT: xorl %eax, %eax
521; X64-NEXT: cmpq %rdx, %rcx
522; X64-NEXT: jne .LBB15_1
523; X64-NEXT: # BB#3: # %endblock
524; X64-NEXT: retq
525; X64-NEXT: .LBB15_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000526; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000527; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000528; X64-NEXT: setae %al
529; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000530; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000531 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
532 ret i32 %m
533}
534
535; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
536
537define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
538; X86-LABEL: length16:
539; X86: # BB#0:
540; X86-NEXT: pushl $0
541; X86-NEXT: pushl $16
542; X86-NEXT: pushl {{[0-9]+}}(%esp)
543; X86-NEXT: pushl {{[0-9]+}}(%esp)
544; X86-NEXT: calll memcmp
545; X86-NEXT: addl $16, %esp
546; X86-NEXT: retl
547;
548; X64-LABEL: length16:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000549; X64: # BB#0: # %loadbb
550; X64-NEXT: movq (%rdi), %rcx
551; X64-NEXT: movq (%rsi), %rdx
552; X64-NEXT: bswapq %rcx
553; X64-NEXT: bswapq %rdx
554; X64-NEXT: cmpq %rdx, %rcx
555; X64-NEXT: jne .LBB16_1
556; X64-NEXT: # BB#2: # %loadbb1
557; X64-NEXT: movq 8(%rdi), %rcx
558; X64-NEXT: movq 8(%rsi), %rdx
559; X64-NEXT: bswapq %rcx
560; X64-NEXT: bswapq %rdx
561; X64-NEXT: xorl %eax, %eax
562; X64-NEXT: cmpq %rdx, %rcx
563; X64-NEXT: jne .LBB16_1
564; X64-NEXT: # BB#3: # %endblock
565; X64-NEXT: retq
566; X64-NEXT: .LBB16_1: # %res_block
Sanjay Patel169dae72017-08-11 15:44:14 +0000567; X64-NEXT: xorl %eax, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000568; X64-NEXT: cmpq %rdx, %rcx
Sanjay Patel169dae72017-08-11 15:44:14 +0000569; X64-NEXT: setae %al
570; X64-NEXT: leal -1(%rax,%rax), %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000571; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000572 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
573 ret i32 %m
574}
575
576define i1 @length16_eq(i8* %x, i8* %y) nounwind optsize {
577; X86-NOSSE-LABEL: length16_eq:
578; X86-NOSSE: # BB#0:
579; X86-NOSSE-NEXT: pushl $0
580; X86-NOSSE-NEXT: pushl $16
581; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
582; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
583; X86-NOSSE-NEXT: calll memcmp
584; X86-NOSSE-NEXT: addl $16, %esp
585; X86-NOSSE-NEXT: testl %eax, %eax
586; X86-NOSSE-NEXT: setne %al
587; X86-NOSSE-NEXT: retl
588;
589; X86-SSE2-LABEL: length16_eq:
590; X86-SSE2: # BB#0:
591; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
592; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
593; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
594; X86-SSE2-NEXT: movdqu (%eax), %xmm1
595; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
596; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
597; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
598; X86-SSE2-NEXT: setne %al
599; X86-SSE2-NEXT: retl
600;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000601; X64-SSE2-LABEL: length16_eq:
602; X64-SSE2: # BB#0:
603; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
604; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
605; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
606; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
607; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
608; X64-SSE2-NEXT: setne %al
609; X64-SSE2-NEXT: retq
610;
611; X64-AVX2-LABEL: length16_eq:
612; X64-AVX2: # BB#0:
613; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
614; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
615; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
616; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
617; X64-AVX2-NEXT: setne %al
618; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000619 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
620 %cmp = icmp ne i32 %call, 0
621 ret i1 %cmp
622}
623
624define i1 @length16_eq_const(i8* %X) nounwind optsize {
625; X86-NOSSE-LABEL: length16_eq_const:
626; X86-NOSSE: # BB#0:
627; X86-NOSSE-NEXT: pushl $0
628; X86-NOSSE-NEXT: pushl $16
629; X86-NOSSE-NEXT: pushl $.L.str
630; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
631; X86-NOSSE-NEXT: calll memcmp
632; X86-NOSSE-NEXT: addl $16, %esp
633; X86-NOSSE-NEXT: testl %eax, %eax
634; X86-NOSSE-NEXT: sete %al
635; X86-NOSSE-NEXT: retl
636;
637; X86-SSE2-LABEL: length16_eq_const:
638; X86-SSE2: # BB#0:
639; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
640; X86-SSE2-NEXT: movdqu (%eax), %xmm0
641; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
642; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
643; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
644; X86-SSE2-NEXT: sete %al
645; X86-SSE2-NEXT: retl
646;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000647; X64-SSE2-LABEL: length16_eq_const:
648; X64-SSE2: # BB#0:
649; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
650; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
651; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
652; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
653; X64-SSE2-NEXT: sete %al
654; X64-SSE2-NEXT: retq
655;
656; X64-AVX2-LABEL: length16_eq_const:
657; X64-AVX2: # BB#0:
658; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
659; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
660; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
661; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
662; X64-AVX2-NEXT: sete %al
663; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000664 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
665 %c = icmp eq i32 %m, 0
666 ret i1 %c
667}
668
Simon Pilgrim3459f102017-07-25 10:33:36 +0000669; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
670
671define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
672; X86-LABEL: length24:
673; X86: # BB#0:
674; X86-NEXT: pushl $0
675; X86-NEXT: pushl $24
676; X86-NEXT: pushl {{[0-9]+}}(%esp)
677; X86-NEXT: pushl {{[0-9]+}}(%esp)
678; X86-NEXT: calll memcmp
679; X86-NEXT: addl $16, %esp
680; X86-NEXT: retl
681;
682; X64-LABEL: length24:
683; X64: # BB#0:
684; X64-NEXT: movl $24, %edx
685; X64-NEXT: jmp memcmp # TAILCALL
686 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
687 ret i32 %m
688}
689
690define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
691; X86-LABEL: length24_eq:
692; X86: # BB#0:
693; X86-NEXT: pushl $0
694; X86-NEXT: pushl $24
695; X86-NEXT: pushl {{[0-9]+}}(%esp)
696; X86-NEXT: pushl {{[0-9]+}}(%esp)
697; X86-NEXT: calll memcmp
698; X86-NEXT: addl $16, %esp
699; X86-NEXT: testl %eax, %eax
700; X86-NEXT: sete %al
701; X86-NEXT: retl
702;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000703; X64-SSE2-LABEL: length24_eq:
704; X64-SSE2: # BB#0: # %loadbb
705; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
706; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
707; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
708; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
709; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
710; X64-SSE2-NEXT: jne .LBB20_1
711; X64-SSE2-NEXT: # BB#2: # %loadbb1
712; X64-SSE2-NEXT: movq 16(%rdi), %rcx
713; X64-SSE2-NEXT: xorl %eax, %eax
714; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
715; X64-SSE2-NEXT: je .LBB20_3
716; X64-SSE2-NEXT: .LBB20_1: # %res_block
717; X64-SSE2-NEXT: movl $1, %eax
718; X64-SSE2-NEXT: .LBB20_3: # %endblock
719; X64-SSE2-NEXT: testl %eax, %eax
720; X64-SSE2-NEXT: sete %al
721; X64-SSE2-NEXT: retq
722;
723; X64-AVX2-LABEL: length24_eq:
724; X64-AVX2: # BB#0: # %loadbb
725; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
726; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
727; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
728; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
729; X64-AVX2-NEXT: jne .LBB20_1
730; X64-AVX2-NEXT: # BB#2: # %loadbb1
731; X64-AVX2-NEXT: movq 16(%rdi), %rcx
732; X64-AVX2-NEXT: xorl %eax, %eax
733; X64-AVX2-NEXT: cmpq 16(%rsi), %rcx
734; X64-AVX2-NEXT: je .LBB20_3
735; X64-AVX2-NEXT: .LBB20_1: # %res_block
736; X64-AVX2-NEXT: movl $1, %eax
737; X64-AVX2-NEXT: .LBB20_3: # %endblock
738; X64-AVX2-NEXT: testl %eax, %eax
739; X64-AVX2-NEXT: sete %al
740; X64-AVX2-NEXT: retq
Simon Pilgrim3459f102017-07-25 10:33:36 +0000741 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
742 %cmp = icmp eq i32 %call, 0
743 ret i1 %cmp
744}
745
746define i1 @length24_eq_const(i8* %X) nounwind optsize {
747; X86-LABEL: length24_eq_const:
748; X86: # BB#0:
749; X86-NEXT: pushl $0
750; X86-NEXT: pushl $24
751; X86-NEXT: pushl $.L.str
752; X86-NEXT: pushl {{[0-9]+}}(%esp)
753; X86-NEXT: calll memcmp
754; X86-NEXT: addl $16, %esp
755; X86-NEXT: testl %eax, %eax
756; X86-NEXT: setne %al
757; X86-NEXT: retl
758;
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000759; X64-SSE2-LABEL: length24_eq_const:
760; X64-SSE2: # BB#0: # %loadbb
761; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
762; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
763; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
764; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
765; X64-SSE2-NEXT: jne .LBB21_1
766; X64-SSE2-NEXT: # BB#2: # %loadbb1
767; X64-SSE2-NEXT: xorl %eax, %eax
768; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
769; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
770; X64-SSE2-NEXT: je .LBB21_3
771; X64-SSE2-NEXT: .LBB21_1: # %res_block
772; X64-SSE2-NEXT: movl $1, %eax
773; X64-SSE2-NEXT: .LBB21_3: # %endblock
774; X64-SSE2-NEXT: testl %eax, %eax
775; X64-SSE2-NEXT: setne %al
776; X64-SSE2-NEXT: retq
777;
778; X64-AVX2-LABEL: length24_eq_const:
779; X64-AVX2: # BB#0: # %loadbb
780; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
781; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
782; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
783; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
784; X64-AVX2-NEXT: jne .LBB21_1
785; X64-AVX2-NEXT: # BB#2: # %loadbb1
786; X64-AVX2-NEXT: xorl %eax, %eax
787; X64-AVX2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
788; X64-AVX2-NEXT: cmpq %rcx, 16(%rdi)
789; X64-AVX2-NEXT: je .LBB21_3
790; X64-AVX2-NEXT: .LBB21_1: # %res_block
791; X64-AVX2-NEXT: movl $1, %eax
792; X64-AVX2-NEXT: .LBB21_3: # %endblock
793; X64-AVX2-NEXT: testl %eax, %eax
794; X64-AVX2-NEXT: setne %al
795; X64-AVX2-NEXT: retq
Simon Pilgrim3459f102017-07-25 10:33:36 +0000796 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
797 %c = icmp ne i32 %m, 0
798 ret i1 %c
799}
800
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000801define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
802; X86-LABEL: length32:
803; X86: # BB#0:
804; X86-NEXT: pushl $0
805; X86-NEXT: pushl $32
806; X86-NEXT: pushl {{[0-9]+}}(%esp)
807; X86-NEXT: pushl {{[0-9]+}}(%esp)
808; X86-NEXT: calll memcmp
809; X86-NEXT: addl $16, %esp
810; X86-NEXT: retl
811;
812; X64-LABEL: length32:
813; X64: # BB#0:
814; X64-NEXT: movl $32, %edx
815; X64-NEXT: jmp memcmp # TAILCALL
816 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
817 ret i32 %m
818}
819
820; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
821
822define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000823; X86-NOSSE-LABEL: length32_eq:
824; X86-NOSSE: # BB#0:
825; X86-NOSSE-NEXT: pushl $0
826; X86-NOSSE-NEXT: pushl $32
827; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
828; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
829; X86-NOSSE-NEXT: calll memcmp
830; X86-NOSSE-NEXT: addl $16, %esp
831; X86-NOSSE-NEXT: testl %eax, %eax
832; X86-NOSSE-NEXT: sete %al
833; X86-NOSSE-NEXT: retl
834;
835; X86-SSE2-LABEL: length32_eq:
836; X86-SSE2: # BB#0: # %loadbb
837; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
838; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
839; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
840; X86-SSE2-NEXT: movdqu (%eax), %xmm1
841; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
842; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
843; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
844; X86-SSE2-NEXT: jne .LBB23_1
845; X86-SSE2-NEXT: # BB#2: # %loadbb1
846; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
847; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
848; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
849; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
850; X86-SSE2-NEXT: xorl %eax, %eax
851; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
852; X86-SSE2-NEXT: je .LBB23_3
853; X86-SSE2-NEXT: .LBB23_1: # %res_block
854; X86-SSE2-NEXT: xorl %eax, %eax
855; X86-SSE2-NEXT: incl %eax
856; X86-SSE2-NEXT: .LBB23_3: # %endblock
857; X86-SSE2-NEXT: testl %eax, %eax
858; X86-SSE2-NEXT: sete %al
859; X86-SSE2-NEXT: retl
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000860;
861; X64-SSE2-LABEL: length32_eq:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000862; X64-SSE2: # BB#0: # %loadbb
863; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
864; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
865; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
866; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
867; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
868; X64-SSE2-NEXT: jne .LBB23_1
869; X64-SSE2-NEXT: # BB#2: # %loadbb1
870; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
871; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
872; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
873; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
874; X64-SSE2-NEXT: xorl %eax, %eax
875; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
876; X64-SSE2-NEXT: je .LBB23_3
877; X64-SSE2-NEXT: .LBB23_1: # %res_block
878; X64-SSE2-NEXT: movl $1, %eax
879; X64-SSE2-NEXT: .LBB23_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000880; X64-SSE2-NEXT: testl %eax, %eax
881; X64-SSE2-NEXT: sete %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000882; X64-SSE2-NEXT: retq
883;
884; X64-AVX2-LABEL: length32_eq:
885; X64-AVX2: # BB#0:
886; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
887; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
888; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
889; X64-AVX2-NEXT: cmpl $-1, %eax
890; X64-AVX2-NEXT: sete %al
891; X64-AVX2-NEXT: vzeroupper
892; X64-AVX2-NEXT: retq
893 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
894 %cmp = icmp eq i32 %call, 0
895 ret i1 %cmp
896}
897
898define i1 @length32_eq_const(i8* %X) nounwind optsize {
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000899; X86-NOSSE-LABEL: length32_eq_const:
900; X86-NOSSE: # BB#0:
901; X86-NOSSE-NEXT: pushl $0
902; X86-NOSSE-NEXT: pushl $32
903; X86-NOSSE-NEXT: pushl $.L.str
904; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
905; X86-NOSSE-NEXT: calll memcmp
906; X86-NOSSE-NEXT: addl $16, %esp
907; X86-NOSSE-NEXT: testl %eax, %eax
908; X86-NOSSE-NEXT: setne %al
909; X86-NOSSE-NEXT: retl
910;
911; X86-SSE2-LABEL: length32_eq_const:
912; X86-SSE2: # BB#0: # %loadbb
913; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
914; X86-SSE2-NEXT: movdqu (%eax), %xmm0
915; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
916; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
917; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
918; X86-SSE2-NEXT: jne .LBB24_1
919; X86-SSE2-NEXT: # BB#2: # %loadbb1
920; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
921; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
922; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
923; X86-SSE2-NEXT: xorl %eax, %eax
924; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
925; X86-SSE2-NEXT: je .LBB24_3
926; X86-SSE2-NEXT: .LBB24_1: # %res_block
927; X86-SSE2-NEXT: xorl %eax, %eax
928; X86-SSE2-NEXT: incl %eax
929; X86-SSE2-NEXT: .LBB24_3: # %endblock
930; X86-SSE2-NEXT: testl %eax, %eax
931; X86-SSE2-NEXT: setne %al
932; X86-SSE2-NEXT: retl
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000933;
934; X64-SSE2-LABEL: length32_eq_const:
Clement Courbetb2c3eb82017-10-30 14:19:33 +0000935; X64-SSE2: # BB#0: # %loadbb
936; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
937; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
938; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
939; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
940; X64-SSE2-NEXT: jne .LBB24_1
941; X64-SSE2-NEXT: # BB#2: # %loadbb1
942; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
943; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
944; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
945; X64-SSE2-NEXT: xorl %eax, %eax
946; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
947; X64-SSE2-NEXT: je .LBB24_3
948; X64-SSE2-NEXT: .LBB24_1: # %res_block
949; X64-SSE2-NEXT: movl $1, %eax
950; X64-SSE2-NEXT: .LBB24_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000951; X64-SSE2-NEXT: testl %eax, %eax
952; X64-SSE2-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000953; X64-SSE2-NEXT: retq
954;
955; X64-AVX2-LABEL: length32_eq_const:
956; X64-AVX2: # BB#0:
957; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
958; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
959; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
960; X64-AVX2-NEXT: cmpl $-1, %eax
961; X64-AVX2-NEXT: setne %al
962; X64-AVX2-NEXT: vzeroupper
963; X64-AVX2-NEXT: retq
964 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
965 %c = icmp ne i32 %m, 0
966 ret i1 %c
967}
968
969define i32 @length64(i8* %X, i8* %Y) nounwind optsize {
970; X86-LABEL: length64:
971; X86: # BB#0:
972; X86-NEXT: pushl $0
973; X86-NEXT: pushl $64
974; X86-NEXT: pushl {{[0-9]+}}(%esp)
975; X86-NEXT: pushl {{[0-9]+}}(%esp)
976; X86-NEXT: calll memcmp
977; X86-NEXT: addl $16, %esp
978; X86-NEXT: retl
979;
980; X64-LABEL: length64:
981; X64: # BB#0:
982; X64-NEXT: movl $64, %edx
983; X64-NEXT: jmp memcmp # TAILCALL
984 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
985 ret i32 %m
986}
987
988define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
989; X86-LABEL: length64_eq:
990; X86: # BB#0:
991; X86-NEXT: pushl $0
992; X86-NEXT: pushl $64
993; X86-NEXT: pushl {{[0-9]+}}(%esp)
994; X86-NEXT: pushl {{[0-9]+}}(%esp)
995; X86-NEXT: calll memcmp
996; X86-NEXT: addl $16, %esp
997; X86-NEXT: testl %eax, %eax
998; X86-NEXT: setne %al
999; X86-NEXT: retl
1000;
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001001; X64-SSE2-LABEL: length64_eq:
1002; X64-SSE2: # BB#0:
1003; X64-SSE2-NEXT: pushq %rax
1004; X64-SSE2-NEXT: movl $64, %edx
1005; X64-SSE2-NEXT: callq memcmp
1006; X64-SSE2-NEXT: testl %eax, %eax
1007; X64-SSE2-NEXT: setne %al
1008; X64-SSE2-NEXT: popq %rcx
1009; X64-SSE2-NEXT: retq
1010;
1011; X64-AVX2-LABEL: length64_eq:
1012; X64-AVX2: # BB#0: # %loadbb
1013; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1014; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
1015; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1016; X64-AVX2-NEXT: cmpl $-1, %eax
1017; X64-AVX2-NEXT: jne .LBB26_1
1018; X64-AVX2-NEXT: # BB#2: # %loadbb1
1019; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1020; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
1021; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1022; X64-AVX2-NEXT: xorl %eax, %eax
1023; X64-AVX2-NEXT: cmpl $-1, %ecx
1024; X64-AVX2-NEXT: je .LBB26_3
1025; X64-AVX2-NEXT: .LBB26_1: # %res_block
1026; X64-AVX2-NEXT: movl $1, %eax
1027; X64-AVX2-NEXT: .LBB26_3: # %endblock
1028; X64-AVX2-NEXT: testl %eax, %eax
1029; X64-AVX2-NEXT: setne %al
1030; X64-AVX2-NEXT: vzeroupper
1031; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001032 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
1033 %cmp = icmp ne i32 %call, 0
1034 ret i1 %cmp
1035}
1036
1037define i1 @length64_eq_const(i8* %X) nounwind optsize {
1038; X86-LABEL: length64_eq_const:
1039; X86: # BB#0:
1040; X86-NEXT: pushl $0
1041; X86-NEXT: pushl $64
1042; X86-NEXT: pushl $.L.str
1043; X86-NEXT: pushl {{[0-9]+}}(%esp)
1044; X86-NEXT: calll memcmp
1045; X86-NEXT: addl $16, %esp
1046; X86-NEXT: testl %eax, %eax
1047; X86-NEXT: sete %al
1048; X86-NEXT: retl
1049;
Clement Courbetb2c3eb82017-10-30 14:19:33 +00001050; X64-SSE2-LABEL: length64_eq_const:
1051; X64-SSE2: # BB#0:
1052; X64-SSE2-NEXT: pushq %rax
1053; X64-SSE2-NEXT: movl $.L.str, %esi
1054; X64-SSE2-NEXT: movl $64, %edx
1055; X64-SSE2-NEXT: callq memcmp
1056; X64-SSE2-NEXT: testl %eax, %eax
1057; X64-SSE2-NEXT: sete %al
1058; X64-SSE2-NEXT: popq %rcx
1059; X64-SSE2-NEXT: retq
1060;
1061; X64-AVX2-LABEL: length64_eq_const:
1062; X64-AVX2: # BB#0: # %loadbb
1063; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1064; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1065; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1066; X64-AVX2-NEXT: cmpl $-1, %eax
1067; X64-AVX2-NEXT: jne .LBB27_1
1068; X64-AVX2-NEXT: # BB#2: # %loadbb1
1069; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1070; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1071; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1072; X64-AVX2-NEXT: xorl %eax, %eax
1073; X64-AVX2-NEXT: cmpl $-1, %ecx
1074; X64-AVX2-NEXT: je .LBB27_3
1075; X64-AVX2-NEXT: .LBB27_1: # %res_block
1076; X64-AVX2-NEXT: movl $1, %eax
1077; X64-AVX2-NEXT: .LBB27_3: # %endblock
1078; X64-AVX2-NEXT: testl %eax, %eax
1079; X64-AVX2-NEXT: sete %al
1080; X64-AVX2-NEXT: vzeroupper
1081; X64-AVX2-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001082 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
1083 %c = icmp eq i32 %m, 0
1084 ret i1 %c
1085}
1086