blob: 80d5149af2038734e98dd2d3dd6d3dc8f161ee2c [file] [log] [blame]
Simon Pilgrimc2cbb522017-07-18 14:26:07 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
6
7; This tests codegen time inlining/optimization of memcmp
8; rdar://6480398
9
10@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
11
12declare i32 @memcmp(i8*, i8*, i64)
13
14define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
15; X86-LABEL: length2:
16; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000017; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
18; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
19; X86-NEXT: movzwl (%ecx), %ecx
20; X86-NEXT: movzwl (%eax), %edx
21; X86-NEXT: rolw $8, %cx
22; X86-NEXT: rolw $8, %dx
Sanjay Patelfea731a2017-07-31 18:08:24 +000023; X86-NEXT: movzwl %cx, %eax
24; X86-NEXT: movzwl %dx, %ecx
25; X86-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000026; X86-NEXT: retl
27;
28; X64-LABEL: length2:
29; X64: # BB#0:
30; X64-NEXT: movzwl (%rdi), %eax
31; X64-NEXT: movzwl (%rsi), %ecx
32; X64-NEXT: rolw $8, %ax
33; X64-NEXT: rolw $8, %cx
Sanjay Patelfea731a2017-07-31 18:08:24 +000034; X64-NEXT: movzwl %ax, %eax
35; X64-NEXT: movzwl %cx, %ecx
36; X64-NEXT: subl %ecx, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +000037; X64-NEXT: retq
38 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
39 ret i32 %m
40}
41
42define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {
43; X86-LABEL: length2_eq:
44; X86: # BB#0:
45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
47; X86-NEXT: movzwl (%ecx), %ecx
48; X86-NEXT: cmpw (%eax), %cx
49; X86-NEXT: sete %al
50; X86-NEXT: retl
51;
52; X64-LABEL: length2_eq:
53; X64: # BB#0:
54; X64-NEXT: movzwl (%rdi), %eax
55; X64-NEXT: cmpw (%rsi), %ax
56; X64-NEXT: sete %al
57; X64-NEXT: retq
58 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
59 %c = icmp eq i32 %m, 0
60 ret i1 %c
61}
62
63define i1 @length2_eq_const(i8* %X) nounwind optsize {
64; X86-LABEL: length2_eq_const:
65; X86: # BB#0:
66; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT: movzwl (%eax), %eax
68; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
69; X86-NEXT: setne %al
70; X86-NEXT: retl
71;
72; X64-LABEL: length2_eq_const:
73; X64: # BB#0:
74; X64-NEXT: movzwl (%rdi), %eax
75; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
76; X64-NEXT: setne %al
77; X64-NEXT: retq
78 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
79 %c = icmp ne i32 %m, 0
80 ret i1 %c
81}
82
83define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind optsize {
84; X86-LABEL: length2_eq_nobuiltin_attr:
85; X86: # BB#0:
86; X86-NEXT: pushl $0
87; X86-NEXT: pushl $2
88; X86-NEXT: pushl {{[0-9]+}}(%esp)
89; X86-NEXT: pushl {{[0-9]+}}(%esp)
90; X86-NEXT: calll memcmp
91; X86-NEXT: addl $16, %esp
92; X86-NEXT: testl %eax, %eax
93; X86-NEXT: sete %al
94; X86-NEXT: retl
95;
96; X64-LABEL: length2_eq_nobuiltin_attr:
97; X64: # BB#0:
98; X64-NEXT: pushq %rax
99; X64-NEXT: movl $2, %edx
100; X64-NEXT: callq memcmp
101; X64-NEXT: testl %eax, %eax
102; X64-NEXT: sete %al
103; X64-NEXT: popq %rcx
104; X64-NEXT: retq
105 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
106 %c = icmp eq i32 %m, 0
107 ret i1 %c
108}
109
110define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
111; X86-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000112; X86: # BB#0: # %loadbb
113; X86-NEXT: pushl %esi
114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT: movzwl (%eax), %edx
117; X86-NEXT: movzwl (%ecx), %esi
118; X86-NEXT: rolw $8, %dx
119; X86-NEXT: rolw $8, %si
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000120; X86-NEXT: cmpw %si, %dx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000121; X86-NEXT: jne .LBB4_1
122; X86-NEXT: # BB#2: # %loadbb1
123; X86-NEXT: movzbl 2(%eax), %eax
124; X86-NEXT: movzbl 2(%ecx), %ecx
125; X86-NEXT: subl %ecx, %eax
126; X86-NEXT: jmp .LBB4_3
127; X86-NEXT: .LBB4_1: # %res_block
128; X86-NEXT: xorl %ecx, %ecx
129; X86-NEXT: incl %ecx
130; X86-NEXT: xorl %eax, %eax
131; X86-NEXT: decl %eax
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000132; X86-NEXT: cmpw %si, %dx
Simon Pilgrim483927a2017-07-18 15:55:30 +0000133; X86-NEXT: cmovael %ecx, %eax
134; X86-NEXT: .LBB4_3: # %endblock
135; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000136; X86-NEXT: retl
137;
138; X64-LABEL: length3:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000139; X64: # BB#0: # %loadbb
140; X64-NEXT: movzwl (%rdi), %eax
141; X64-NEXT: movzwl (%rsi), %ecx
142; X64-NEXT: rolw $8, %ax
143; X64-NEXT: rolw $8, %cx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000144; X64-NEXT: cmpw %cx, %ax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000145; X64-NEXT: jne .LBB4_1
146; X64-NEXT: # BB#2: # %loadbb1
147; X64-NEXT: movzbl 2(%rdi), %eax
148; X64-NEXT: movzbl 2(%rsi), %ecx
149; X64-NEXT: subl %ecx, %eax
150; X64-NEXT: retq
151; X64-NEXT: .LBB4_1: # %res_block
152; X64-NEXT: movl $-1, %ecx
153; X64-NEXT: movl $1, %eax
154; X64-NEXT: cmovbl %ecx, %eax
155; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000156 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
157 ret i32 %m
158}
159
160define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
161; X86-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000162; X86: # BB#0: # %loadbb
163; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
164; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165; X86-NEXT: movzwl (%eax), %edx
166; X86-NEXT: cmpw (%ecx), %dx
167; X86-NEXT: jne .LBB5_1
168; X86-NEXT: # BB#2: # %loadbb1
169; X86-NEXT: movb 2(%eax), %dl
170; X86-NEXT: xorl %eax, %eax
171; X86-NEXT: cmpb 2(%ecx), %dl
172; X86-NEXT: je .LBB5_3
173; X86-NEXT: .LBB5_1: # %res_block
174; X86-NEXT: xorl %eax, %eax
175; X86-NEXT: incl %eax
176; X86-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000177; X86-NEXT: testl %eax, %eax
178; X86-NEXT: setne %al
179; X86-NEXT: retl
180;
181; X64-LABEL: length3_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000182; X64: # BB#0: # %loadbb
183; X64-NEXT: movzwl (%rdi), %eax
184; X64-NEXT: cmpw (%rsi), %ax
185; X64-NEXT: jne .LBB5_1
186; X64-NEXT: # BB#2: # %loadbb1
187; X64-NEXT: movb 2(%rdi), %cl
188; X64-NEXT: xorl %eax, %eax
189; X64-NEXT: cmpb 2(%rsi), %cl
190; X64-NEXT: je .LBB5_3
191; X64-NEXT: .LBB5_1: # %res_block
192; X64-NEXT: movl $1, %eax
193; X64-NEXT: .LBB5_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000194; X64-NEXT: testl %eax, %eax
195; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000196; X64-NEXT: retq
197 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
198 %c = icmp ne i32 %m, 0
199 ret i1 %c
200}
201
202define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
203; X86-LABEL: length4:
204; X86: # BB#0:
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000205; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
206; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
207; X86-NEXT: movl (%ecx), %ecx
208; X86-NEXT: movl (%eax), %edx
209; X86-NEXT: bswapl %ecx
210; X86-NEXT: bswapl %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000211; X86-NEXT: xorl %eax, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000212; X86-NEXT: cmpl %edx, %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000213; X86-NEXT: seta %al
214; X86-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000215; X86-NEXT: retl
216;
217; X64-LABEL: length4:
218; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000219; X64-NEXT: movl (%rdi), %ecx
220; X64-NEXT: movl (%rsi), %edx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000221; X64-NEXT: bswapl %ecx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000222; X64-NEXT: bswapl %edx
223; X64-NEXT: xorl %eax, %eax
224; X64-NEXT: cmpl %edx, %ecx
225; X64-NEXT: seta %al
226; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000227; X64-NEXT: retq
228 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
229 ret i32 %m
230}
231
232define i1 @length4_eq(i8* %X, i8* %Y) nounwind optsize {
233; X86-LABEL: length4_eq:
234; X86: # BB#0:
235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
236; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
237; X86-NEXT: movl (%ecx), %ecx
238; X86-NEXT: cmpl (%eax), %ecx
239; X86-NEXT: setne %al
240; X86-NEXT: retl
241;
242; X64-LABEL: length4_eq:
243; X64: # BB#0:
244; X64-NEXT: movl (%rdi), %eax
245; X64-NEXT: cmpl (%rsi), %eax
246; X64-NEXT: setne %al
247; X64-NEXT: retq
248 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
249 %c = icmp ne i32 %m, 0
250 ret i1 %c
251}
252
253define i1 @length4_eq_const(i8* %X) nounwind optsize {
254; X86-LABEL: length4_eq_const:
255; X86: # BB#0:
256; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
257; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
258; X86-NEXT: sete %al
259; X86-NEXT: retl
260;
261; X64-LABEL: length4_eq_const:
262; X64: # BB#0:
263; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
264; X64-NEXT: sete %al
265; X64-NEXT: retq
266 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
267 %c = icmp eq i32 %m, 0
268 ret i1 %c
269}
270
271define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
272; X86-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000273; X86: # BB#0: # %loadbb
274; X86-NEXT: pushl %esi
275; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
276; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
277; X86-NEXT: movl (%eax), %edx
278; X86-NEXT: movl (%ecx), %esi
279; X86-NEXT: bswapl %edx
280; X86-NEXT: bswapl %esi
281; X86-NEXT: cmpl %esi, %edx
282; X86-NEXT: jne .LBB9_1
283; X86-NEXT: # BB#2: # %loadbb1
284; X86-NEXT: movzbl 4(%eax), %eax
285; X86-NEXT: movzbl 4(%ecx), %ecx
286; X86-NEXT: subl %ecx, %eax
287; X86-NEXT: jmp .LBB9_3
288; X86-NEXT: .LBB9_1: # %res_block
289; X86-NEXT: xorl %ecx, %ecx
290; X86-NEXT: incl %ecx
291; X86-NEXT: xorl %eax, %eax
292; X86-NEXT: decl %eax
293; X86-NEXT: cmpl %esi, %edx
294; X86-NEXT: cmovael %ecx, %eax
295; X86-NEXT: .LBB9_3: # %endblock
296; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000297; X86-NEXT: retl
298;
299; X64-LABEL: length5:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000300; X64: # BB#0: # %loadbb
301; X64-NEXT: movl (%rdi), %eax
302; X64-NEXT: movl (%rsi), %ecx
303; X64-NEXT: bswapl %eax
304; X64-NEXT: bswapl %ecx
Sanjay Patel4dbdd472017-08-01 17:24:54 +0000305; X64-NEXT: cmpl %ecx, %eax
Simon Pilgrim483927a2017-07-18 15:55:30 +0000306; X64-NEXT: jne .LBB9_1
307; X64-NEXT: # BB#2: # %loadbb1
308; X64-NEXT: movzbl 4(%rdi), %eax
309; X64-NEXT: movzbl 4(%rsi), %ecx
310; X64-NEXT: subl %ecx, %eax
311; X64-NEXT: retq
312; X64-NEXT: .LBB9_1: # %res_block
313; X64-NEXT: movl $-1, %ecx
314; X64-NEXT: movl $1, %eax
315; X64-NEXT: cmovbl %ecx, %eax
316; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000317 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
318 ret i32 %m
319}
320
321define i1 @length5_eq(i8* %X, i8* %Y) nounwind optsize {
322; X86-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000323; X86: # BB#0: # %loadbb
324; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
325; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
326; X86-NEXT: movl (%eax), %edx
327; X86-NEXT: cmpl (%ecx), %edx
328; X86-NEXT: jne .LBB10_1
329; X86-NEXT: # BB#2: # %loadbb1
330; X86-NEXT: movb 4(%eax), %dl
331; X86-NEXT: xorl %eax, %eax
332; X86-NEXT: cmpb 4(%ecx), %dl
333; X86-NEXT: je .LBB10_3
334; X86-NEXT: .LBB10_1: # %res_block
335; X86-NEXT: xorl %eax, %eax
336; X86-NEXT: incl %eax
337; X86-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000338; X86-NEXT: testl %eax, %eax
339; X86-NEXT: setne %al
340; X86-NEXT: retl
341;
342; X64-LABEL: length5_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000343; X64: # BB#0: # %loadbb
344; X64-NEXT: movl (%rdi), %eax
345; X64-NEXT: cmpl (%rsi), %eax
346; X64-NEXT: jne .LBB10_1
347; X64-NEXT: # BB#2: # %loadbb1
348; X64-NEXT: movb 4(%rdi), %cl
349; X64-NEXT: xorl %eax, %eax
350; X64-NEXT: cmpb 4(%rsi), %cl
351; X64-NEXT: je .LBB10_3
352; X64-NEXT: .LBB10_1: # %res_block
353; X64-NEXT: movl $1, %eax
354; X64-NEXT: .LBB10_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000355; X64-NEXT: testl %eax, %eax
356; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000357; X64-NEXT: retq
358 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
359 %c = icmp ne i32 %m, 0
360 ret i1 %c
361}
362
363define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
364; X86-LABEL: length8:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000365; X86: # BB#0: # %loadbb
366; X86-NEXT: pushl %esi
367; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
368; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
369; X86-NEXT: movl (%esi), %ecx
370; X86-NEXT: movl (%eax), %edx
371; X86-NEXT: bswapl %ecx
372; X86-NEXT: bswapl %edx
373; X86-NEXT: cmpl %edx, %ecx
374; X86-NEXT: jne .LBB11_1
375; X86-NEXT: # BB#2: # %loadbb1
376; X86-NEXT: movl 4(%esi), %ecx
377; X86-NEXT: movl 4(%eax), %edx
378; X86-NEXT: bswapl %ecx
379; X86-NEXT: bswapl %edx
380; X86-NEXT: xorl %eax, %eax
381; X86-NEXT: cmpl %edx, %ecx
382; X86-NEXT: je .LBB11_3
383; X86-NEXT: .LBB11_1: # %res_block
384; X86-NEXT: xorl %esi, %esi
385; X86-NEXT: incl %esi
386; X86-NEXT: xorl %eax, %eax
387; X86-NEXT: decl %eax
388; X86-NEXT: cmpl %edx, %ecx
389; X86-NEXT: cmovael %esi, %eax
390; X86-NEXT: .LBB11_3: # %endblock
391; X86-NEXT: popl %esi
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000392; X86-NEXT: retl
393;
394; X64-LABEL: length8:
395; X64: # BB#0:
Sanjay Patelfea731a2017-07-31 18:08:24 +0000396; X64-NEXT: movq (%rdi), %rcx
397; X64-NEXT: movq (%rsi), %rdx
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000398; X64-NEXT: bswapq %rcx
Sanjay Patelfea731a2017-07-31 18:08:24 +0000399; X64-NEXT: bswapq %rdx
400; X64-NEXT: xorl %eax, %eax
401; X64-NEXT: cmpq %rdx, %rcx
402; X64-NEXT: seta %al
403; X64-NEXT: sbbl $0, %eax
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000404; X64-NEXT: retq
405 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
406 ret i32 %m
407}
408
409define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
410; X86-LABEL: length8_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000411; X86: # BB#0: # %loadbb
412; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
413; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
414; X86-NEXT: movl (%eax), %edx
415; X86-NEXT: cmpl (%ecx), %edx
416; X86-NEXT: jne .LBB12_1
417; X86-NEXT: # BB#2: # %loadbb1
418; X86-NEXT: movl 4(%eax), %edx
419; X86-NEXT: xorl %eax, %eax
420; X86-NEXT: cmpl 4(%ecx), %edx
421; X86-NEXT: je .LBB12_3
422; X86-NEXT: .LBB12_1: # %res_block
423; X86-NEXT: xorl %eax, %eax
424; X86-NEXT: incl %eax
425; X86-NEXT: .LBB12_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000426; X86-NEXT: testl %eax, %eax
427; X86-NEXT: sete %al
428; X86-NEXT: retl
429;
430; X64-LABEL: length8_eq:
431; X64: # BB#0:
432; X64-NEXT: movq (%rdi), %rax
433; X64-NEXT: cmpq (%rsi), %rax
434; X64-NEXT: sete %al
435; X64-NEXT: retq
436 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
437 %c = icmp eq i32 %m, 0
438 ret i1 %c
439}
440
441define i1 @length8_eq_const(i8* %X) nounwind optsize {
442; X86-LABEL: length8_eq_const:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000443; X86: # BB#0: # %loadbb
444; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
445; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
446; X86-NEXT: jne .LBB13_1
447; X86-NEXT: # BB#2: # %loadbb1
448; X86-NEXT: xorl %eax, %eax
449; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
450; X86-NEXT: je .LBB13_3
451; X86-NEXT: .LBB13_1: # %res_block
452; X86-NEXT: xorl %eax, %eax
453; X86-NEXT: incl %eax
454; X86-NEXT: .LBB13_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000455; X86-NEXT: testl %eax, %eax
456; X86-NEXT: setne %al
457; X86-NEXT: retl
458;
459; X64-LABEL: length8_eq_const:
460; X64: # BB#0:
461; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
462; X64-NEXT: cmpq %rax, (%rdi)
463; X64-NEXT: setne %al
464; X64-NEXT: retq
465 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
466 %c = icmp ne i32 %m, 0
467 ret i1 %c
468}
469
470define i1 @length12_eq(i8* %X, i8* %Y) nounwind optsize {
471; X86-LABEL: length12_eq:
472; X86: # BB#0:
473; X86-NEXT: pushl $0
474; X86-NEXT: pushl $12
475; X86-NEXT: pushl {{[0-9]+}}(%esp)
476; X86-NEXT: pushl {{[0-9]+}}(%esp)
477; X86-NEXT: calll memcmp
478; X86-NEXT: addl $16, %esp
479; X86-NEXT: testl %eax, %eax
480; X86-NEXT: setne %al
481; X86-NEXT: retl
482;
483; X64-LABEL: length12_eq:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000484; X64: # BB#0: # %loadbb
485; X64-NEXT: movq (%rdi), %rax
486; X64-NEXT: cmpq (%rsi), %rax
487; X64-NEXT: jne .LBB14_1
488; X64-NEXT: # BB#2: # %loadbb1
489; X64-NEXT: movl 8(%rdi), %ecx
490; X64-NEXT: xorl %eax, %eax
491; X64-NEXT: cmpl 8(%rsi), %ecx
492; X64-NEXT: je .LBB14_3
493; X64-NEXT: .LBB14_1: # %res_block
494; X64-NEXT: movl $1, %eax
495; X64-NEXT: .LBB14_3: # %endblock
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000496; X64-NEXT: testl %eax, %eax
497; X64-NEXT: setne %al
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000498; X64-NEXT: retq
499 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
500 %c = icmp ne i32 %m, 0
501 ret i1 %c
502}
503
504define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
505; X86-LABEL: length12:
506; X86: # BB#0:
507; X86-NEXT: pushl $0
508; X86-NEXT: pushl $12
509; X86-NEXT: pushl {{[0-9]+}}(%esp)
510; X86-NEXT: pushl {{[0-9]+}}(%esp)
511; X86-NEXT: calll memcmp
512; X86-NEXT: addl $16, %esp
513; X86-NEXT: retl
514;
515; X64-LABEL: length12:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000516; X64: # BB#0: # %loadbb
517; X64-NEXT: movq (%rdi), %rcx
518; X64-NEXT: movq (%rsi), %rdx
519; X64-NEXT: bswapq %rcx
520; X64-NEXT: bswapq %rdx
521; X64-NEXT: cmpq %rdx, %rcx
522; X64-NEXT: jne .LBB15_1
523; X64-NEXT: # BB#2: # %loadbb1
524; X64-NEXT: movl 8(%rdi), %ecx
525; X64-NEXT: movl 8(%rsi), %edx
526; X64-NEXT: bswapl %ecx
527; X64-NEXT: bswapl %edx
528; X64-NEXT: xorl %eax, %eax
529; X64-NEXT: cmpq %rdx, %rcx
530; X64-NEXT: jne .LBB15_1
531; X64-NEXT: # BB#3: # %endblock
532; X64-NEXT: retq
533; X64-NEXT: .LBB15_1: # %res_block
534; X64-NEXT: cmpq %rdx, %rcx
535; X64-NEXT: movl $-1, %ecx
536; X64-NEXT: movl $1, %eax
537; X64-NEXT: cmovbl %ecx, %eax
538; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000539 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
540 ret i32 %m
541}
542
543; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
544
545define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
546; X86-LABEL: length16:
547; X86: # BB#0:
548; X86-NEXT: pushl $0
549; X86-NEXT: pushl $16
550; X86-NEXT: pushl {{[0-9]+}}(%esp)
551; X86-NEXT: pushl {{[0-9]+}}(%esp)
552; X86-NEXT: calll memcmp
553; X86-NEXT: addl $16, %esp
554; X86-NEXT: retl
555;
556; X64-LABEL: length16:
Simon Pilgrim483927a2017-07-18 15:55:30 +0000557; X64: # BB#0: # %loadbb
558; X64-NEXT: movq (%rdi), %rcx
559; X64-NEXT: movq (%rsi), %rdx
560; X64-NEXT: bswapq %rcx
561; X64-NEXT: bswapq %rdx
562; X64-NEXT: cmpq %rdx, %rcx
563; X64-NEXT: jne .LBB16_1
564; X64-NEXT: # BB#2: # %loadbb1
565; X64-NEXT: movq 8(%rdi), %rcx
566; X64-NEXT: movq 8(%rsi), %rdx
567; X64-NEXT: bswapq %rcx
568; X64-NEXT: bswapq %rdx
569; X64-NEXT: xorl %eax, %eax
570; X64-NEXT: cmpq %rdx, %rcx
571; X64-NEXT: jne .LBB16_1
572; X64-NEXT: # BB#3: # %endblock
573; X64-NEXT: retq
574; X64-NEXT: .LBB16_1: # %res_block
575; X64-NEXT: cmpq %rdx, %rcx
576; X64-NEXT: movl $-1, %ecx
577; X64-NEXT: movl $1, %eax
578; X64-NEXT: cmovbl %ecx, %eax
579; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000580 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
581 ret i32 %m
582}
583
584define i1 @length16_eq(i8* %x, i8* %y) nounwind optsize {
585; X86-NOSSE-LABEL: length16_eq:
586; X86-NOSSE: # BB#0:
587; X86-NOSSE-NEXT: pushl $0
588; X86-NOSSE-NEXT: pushl $16
589; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
590; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
591; X86-NOSSE-NEXT: calll memcmp
592; X86-NOSSE-NEXT: addl $16, %esp
593; X86-NOSSE-NEXT: testl %eax, %eax
594; X86-NOSSE-NEXT: setne %al
595; X86-NOSSE-NEXT: retl
596;
597; X86-SSE2-LABEL: length16_eq:
598; X86-SSE2: # BB#0:
599; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
600; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
601; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
602; X86-SSE2-NEXT: movdqu (%eax), %xmm1
603; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
604; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
605; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
606; X86-SSE2-NEXT: setne %al
607; X86-SSE2-NEXT: retl
608;
Simon Pilgrim483927a2017-07-18 15:55:30 +0000609; X64-LABEL: length16_eq:
610; X64: # BB#0: # %loadbb
611; X64-NEXT: movq (%rdi), %rax
612; X64-NEXT: cmpq (%rsi), %rax
613; X64-NEXT: jne .LBB17_1
614; X64-NEXT: # BB#2: # %loadbb1
615; X64-NEXT: movq 8(%rdi), %rcx
616; X64-NEXT: xorl %eax, %eax
617; X64-NEXT: cmpq 8(%rsi), %rcx
618; X64-NEXT: je .LBB17_3
619; X64-NEXT: .LBB17_1: # %res_block
620; X64-NEXT: movl $1, %eax
621; X64-NEXT: .LBB17_3: # %endblock
622; X64-NEXT: testl %eax, %eax
623; X64-NEXT: setne %al
624; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000625 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
626 %cmp = icmp ne i32 %call, 0
627 ret i1 %cmp
628}
629
630define i1 @length16_eq_const(i8* %X) nounwind optsize {
631; X86-NOSSE-LABEL: length16_eq_const:
632; X86-NOSSE: # BB#0:
633; X86-NOSSE-NEXT: pushl $0
634; X86-NOSSE-NEXT: pushl $16
635; X86-NOSSE-NEXT: pushl $.L.str
636; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
637; X86-NOSSE-NEXT: calll memcmp
638; X86-NOSSE-NEXT: addl $16, %esp
639; X86-NOSSE-NEXT: testl %eax, %eax
640; X86-NOSSE-NEXT: sete %al
641; X86-NOSSE-NEXT: retl
642;
643; X86-SSE2-LABEL: length16_eq_const:
644; X86-SSE2: # BB#0:
645; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
646; X86-SSE2-NEXT: movdqu (%eax), %xmm0
647; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
648; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
649; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
650; X86-SSE2-NEXT: sete %al
651; X86-SSE2-NEXT: retl
652;
Simon Pilgrim483927a2017-07-18 15:55:30 +0000653; X64-LABEL: length16_eq_const:
654; X64: # BB#0: # %loadbb
655; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
656; X64-NEXT: cmpq %rax, (%rdi)
657; X64-NEXT: jne .LBB18_1
658; X64-NEXT: # BB#2: # %loadbb1
659; X64-NEXT: xorl %eax, %eax
660; X64-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
661; X64-NEXT: cmpq %rcx, 8(%rdi)
662; X64-NEXT: je .LBB18_3
663; X64-NEXT: .LBB18_1: # %res_block
664; X64-NEXT: movl $1, %eax
665; X64-NEXT: .LBB18_3: # %endblock
666; X64-NEXT: testl %eax, %eax
667; X64-NEXT: sete %al
668; X64-NEXT: retq
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000669 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
670 %c = icmp eq i32 %m, 0
671 ret i1 %c
672}
673
Simon Pilgrim3459f102017-07-25 10:33:36 +0000674; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
675
676define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
677; X86-LABEL: length24:
678; X86: # BB#0:
679; X86-NEXT: pushl $0
680; X86-NEXT: pushl $24
681; X86-NEXT: pushl {{[0-9]+}}(%esp)
682; X86-NEXT: pushl {{[0-9]+}}(%esp)
683; X86-NEXT: calll memcmp
684; X86-NEXT: addl $16, %esp
685; X86-NEXT: retl
686;
687; X64-LABEL: length24:
688; X64: # BB#0:
689; X64-NEXT: movl $24, %edx
690; X64-NEXT: jmp memcmp # TAILCALL
691 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
692 ret i32 %m
693}
694
695define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
696; X86-LABEL: length24_eq:
697; X86: # BB#0:
698; X86-NEXT: pushl $0
699; X86-NEXT: pushl $24
700; X86-NEXT: pushl {{[0-9]+}}(%esp)
701; X86-NEXT: pushl {{[0-9]+}}(%esp)
702; X86-NEXT: calll memcmp
703; X86-NEXT: addl $16, %esp
704; X86-NEXT: testl %eax, %eax
705; X86-NEXT: sete %al
706; X86-NEXT: retl
707;
708; X64-LABEL: length24_eq:
709; X64: # BB#0:
710; X64-NEXT: pushq %rax
711; X64-NEXT: movl $24, %edx
712; X64-NEXT: callq memcmp
713; X64-NEXT: testl %eax, %eax
714; X64-NEXT: sete %al
715; X64-NEXT: popq %rcx
716; X64-NEXT: retq
717 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
718 %cmp = icmp eq i32 %call, 0
719 ret i1 %cmp
720}
721
722define i1 @length24_eq_const(i8* %X) nounwind optsize {
723; X86-LABEL: length24_eq_const:
724; X86: # BB#0:
725; X86-NEXT: pushl $0
726; X86-NEXT: pushl $24
727; X86-NEXT: pushl $.L.str
728; X86-NEXT: pushl {{[0-9]+}}(%esp)
729; X86-NEXT: calll memcmp
730; X86-NEXT: addl $16, %esp
731; X86-NEXT: testl %eax, %eax
732; X86-NEXT: setne %al
733; X86-NEXT: retl
734;
735; X64-LABEL: length24_eq_const:
736; X64: # BB#0:
737; X64-NEXT: pushq %rax
738; X64-NEXT: movl $.L.str, %esi
739; X64-NEXT: movl $24, %edx
740; X64-NEXT: callq memcmp
741; X64-NEXT: testl %eax, %eax
742; X64-NEXT: setne %al
743; X64-NEXT: popq %rcx
744; X64-NEXT: retq
745 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
746 %c = icmp ne i32 %m, 0
747 ret i1 %c
748}
749
Simon Pilgrimc2cbb522017-07-18 14:26:07 +0000750define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
751; X86-LABEL: length32:
752; X86: # BB#0:
753; X86-NEXT: pushl $0
754; X86-NEXT: pushl $32
755; X86-NEXT: pushl {{[0-9]+}}(%esp)
756; X86-NEXT: pushl {{[0-9]+}}(%esp)
757; X86-NEXT: calll memcmp
758; X86-NEXT: addl $16, %esp
759; X86-NEXT: retl
760;
761; X64-LABEL: length32:
762; X64: # BB#0:
763; X64-NEXT: movl $32, %edx
764; X64-NEXT: jmp memcmp # TAILCALL
765 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
766 ret i32 %m
767}
768
769; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
770
771define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
772; X86-LABEL: length32_eq:
773; X86: # BB#0:
774; X86-NEXT: pushl $0
775; X86-NEXT: pushl $32
776; X86-NEXT: pushl {{[0-9]+}}(%esp)
777; X86-NEXT: pushl {{[0-9]+}}(%esp)
778; X86-NEXT: calll memcmp
779; X86-NEXT: addl $16, %esp
780; X86-NEXT: testl %eax, %eax
781; X86-NEXT: sete %al
782; X86-NEXT: retl
783;
784; X64-SSE2-LABEL: length32_eq:
785; X64-SSE2: # BB#0:
786; X64-SSE2-NEXT: pushq %rax
787; X64-SSE2-NEXT: movl $32, %edx
788; X64-SSE2-NEXT: callq memcmp
789; X64-SSE2-NEXT: testl %eax, %eax
790; X64-SSE2-NEXT: sete %al
791; X64-SSE2-NEXT: popq %rcx
792; X64-SSE2-NEXT: retq
793;
794; X64-AVX2-LABEL: length32_eq:
795; X64-AVX2: # BB#0:
796; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
797; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
798; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
799; X64-AVX2-NEXT: cmpl $-1, %eax
800; X64-AVX2-NEXT: sete %al
801; X64-AVX2-NEXT: vzeroupper
802; X64-AVX2-NEXT: retq
803 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
804 %cmp = icmp eq i32 %call, 0
805 ret i1 %cmp
806}
807
808define i1 @length32_eq_const(i8* %X) nounwind optsize {
809; X86-LABEL: length32_eq_const:
810; X86: # BB#0:
811; X86-NEXT: pushl $0
812; X86-NEXT: pushl $32
813; X86-NEXT: pushl $.L.str
814; X86-NEXT: pushl {{[0-9]+}}(%esp)
815; X86-NEXT: calll memcmp
816; X86-NEXT: addl $16, %esp
817; X86-NEXT: testl %eax, %eax
818; X86-NEXT: setne %al
819; X86-NEXT: retl
820;
821; X64-SSE2-LABEL: length32_eq_const:
822; X64-SSE2: # BB#0:
823; X64-SSE2-NEXT: pushq %rax
824; X64-SSE2-NEXT: movl $.L.str, %esi
825; X64-SSE2-NEXT: movl $32, %edx
826; X64-SSE2-NEXT: callq memcmp
827; X64-SSE2-NEXT: testl %eax, %eax
828; X64-SSE2-NEXT: setne %al
829; X64-SSE2-NEXT: popq %rcx
830; X64-SSE2-NEXT: retq
831;
832; X64-AVX2-LABEL: length32_eq_const:
833; X64-AVX2: # BB#0:
834; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
835; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
836; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
837; X64-AVX2-NEXT: cmpl $-1, %eax
838; X64-AVX2-NEXT: setne %al
839; X64-AVX2-NEXT: vzeroupper
840; X64-AVX2-NEXT: retq
841 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
842 %c = icmp ne i32 %m, 0
843 ret i1 %c
844}
845
846define i32 @length64(i8* %X, i8* %Y) nounwind optsize {
847; X86-LABEL: length64:
848; X86: # BB#0:
849; X86-NEXT: pushl $0
850; X86-NEXT: pushl $64
851; X86-NEXT: pushl {{[0-9]+}}(%esp)
852; X86-NEXT: pushl {{[0-9]+}}(%esp)
853; X86-NEXT: calll memcmp
854; X86-NEXT: addl $16, %esp
855; X86-NEXT: retl
856;
857; X64-LABEL: length64:
858; X64: # BB#0:
859; X64-NEXT: movl $64, %edx
860; X64-NEXT: jmp memcmp # TAILCALL
861 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
862 ret i32 %m
863}
864
865define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
866; X86-LABEL: length64_eq:
867; X86: # BB#0:
868; X86-NEXT: pushl $0
869; X86-NEXT: pushl $64
870; X86-NEXT: pushl {{[0-9]+}}(%esp)
871; X86-NEXT: pushl {{[0-9]+}}(%esp)
872; X86-NEXT: calll memcmp
873; X86-NEXT: addl $16, %esp
874; X86-NEXT: testl %eax, %eax
875; X86-NEXT: setne %al
876; X86-NEXT: retl
877;
878; X64-LABEL: length64_eq:
879; X64: # BB#0:
880; X64-NEXT: pushq %rax
881; X64-NEXT: movl $64, %edx
882; X64-NEXT: callq memcmp
883; X64-NEXT: testl %eax, %eax
884; X64-NEXT: setne %al
885; X64-NEXT: popq %rcx
886; X64-NEXT: retq
887 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
888 %cmp = icmp ne i32 %call, 0
889 ret i1 %cmp
890}
891
892define i1 @length64_eq_const(i8* %X) nounwind optsize {
893; X86-LABEL: length64_eq_const:
894; X86: # BB#0:
895; X86-NEXT: pushl $0
896; X86-NEXT: pushl $64
897; X86-NEXT: pushl $.L.str
898; X86-NEXT: pushl {{[0-9]+}}(%esp)
899; X86-NEXT: calll memcmp
900; X86-NEXT: addl $16, %esp
901; X86-NEXT: testl %eax, %eax
902; X86-NEXT: sete %al
903; X86-NEXT: retl
904;
905; X64-LABEL: length64_eq_const:
906; X64: # BB#0:
907; X64-NEXT: pushq %rax
908; X64-NEXT: movl $.L.str, %esi
909; X64-NEXT: movl $64, %edx
910; X64-NEXT: callq memcmp
911; X64-NEXT: testl %eax, %eax
912; X64-NEXT: sete %al
913; X64-NEXT: popq %rcx
914; X64-NEXT: retq
915 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
916 %c = icmp eq i32 %m, 0
917 ret i1 %c
918}
919