handle equality memcmp of 8 bytes on x86-64 with two unaligned loads and a
compare. On other targets we end up with a call to memcmp because we don't
want 16 individual byte loads. We should be able to use movups as well, but
we're failing to select the generated icmp.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92107 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f6086c4..b90d2e2 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -3,7 +3,7 @@
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
-@.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
declare i32 @memcmp(...)
@@ -26,7 +26,7 @@
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -60,7 +60,7 @@
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -71,6 +71,40 @@
return: ; preds = %entry
ret void
; CHECK: memcmp4a:
-; CHECK: cmpl $2021158767, (%rdi)
+; CHECK: cmpl $1869573999, (%rdi)
+}
+
+define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %return, label %bb
+
+bb: ; preds = %entry
+ store i32 4, i32* %P, align 4
+ ret void
+
+return: ; preds = %entry
+ ret void
+; CHECK: memcmp8:
+; CHECK: movq (%rsi), %rax
+; CHECK: cmpq %rax, (%rdi)
+}
+
+define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %return, label %bb
+
+bb: ; preds = %entry
+ store i32 4, i32* %P, align 4
+ ret void
+
+return: ; preds = %entry
+ ret void
+; CHECK: memcmp8a:
+; CHECK: movabsq $8029759185026510694, %rax
+; CHECK: cmpq %rax, (%rdi)
}