[CGP] use subtract or subtract-of-cmps for result of memcmp expansion As noted in the code comment, transforming this in the other direction might require a separate transform here in CGP given the block-at-a-time DAG constraint. Besides that theoretical motivation, there are 2 practical motivations for the subtract-of-cmps form: 1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still). There is discussion about canonicalizing IR to the select form ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ), so we probably need to add DAG transforms for those patterns anyway, but this improves the memcmp output without waiting for that step. 2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided if we choose to enable that. Differential Revision: https://reviews.llvm.org/D34904 llvm-svn: 309597

commit: fea731a4aa6aabf270fbb9ba6401ca8826c55a9b [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Mon Jul 31 18:08:24 2017 +0000
committer: Sanjay Patel <spatel@rotateright.com> Mon Jul 31 18:08:24 2017 +0000
tree: 93d0d7e6532ef8f3b297548da5097f879e878312
parent: 70d35e102ef8dbba10e2db84ea2dcbe95bbbfd38 [diff] [blame]
diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
index fbaaa8b..36ba525 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp.ll

@@ -6,11 +6,13 @@
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    ldbrx 3, 0, 3
 ; CHECK-NEXT:    ldbrx 4, 0, 4
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    li 12, -1
+; CHECK-NEXT:    li 5, 0
 ; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    isel 3, 12, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 2
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    isel 4, 3, 5, 1
+; CHECK-NEXT:    isel 3, 3, 5, 0
+; CHECK-NEXT:    subf 3, 3, 4
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %t0 = bitcast i32* %buffer1 to i8*
   %t1 = bitcast i32* %buffer2 to i8*
@@ -23,11 +25,12 @@
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    lwbrx 3, 0, 3
 ; CHECK-NEXT:    lwbrx 4, 0, 4
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    li 12, -1
-; CHECK-NEXT:    cmplw 3, 4
-; CHECK-NEXT:    isel 3, 12, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 2
+; CHECK-NEXT:    sub 5, 4, 3
+; CHECK-NEXT:    sub 3, 3, 4
+; CHECK-NEXT:    rldicl 4, 5, 1, 63
+; CHECK-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-NEXT:    subf 3, 3, 4
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %t0 = bitcast i32* %buffer1 to i8*
   %t1 = bitcast i32* %buffer2 to i8*
@@ -40,11 +43,8 @@
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    lhbrx 3, 0, 3
 ; CHECK-NEXT:    lhbrx 4, 0, 4
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    li 12, -1
-; CHECK-NEXT:    cmplw 3, 4
-; CHECK-NEXT:    isel 3, 12, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 2
+; CHECK-NEXT:    subf 3, 4, 3
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %t0 = bitcast i32* %buffer1 to i8*
   %t1 = bitcast i32* %buffer2 to i8*
@@ -57,11 +57,8 @@
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    lbz 3, 0(3)
 ; CHECK-NEXT:    lbz 4, 0(4)
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    li 12, -1
-; CHECK-NEXT:    cmplw 3, 4
-; CHECK-NEXT:    isel 3, 12, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 2
+; CHECK-NEXT:    subf 3, 4, 3
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %t0 = bitcast i32* %buffer1 to i8*
   %t1 = bitcast i32* %buffer2 to i8*
commit	fea731a4aa6aabf270fbb9ba6401ca8826c55a9b	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Mon Jul 31 18:08:24 2017 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Mon Jul 31 18:08:24 2017 +0000
tree	93d0d7e6532ef8f3b297548da5097f879e878312
parent	70d35e102ef8dbba10e2db84ea2dcbe95bbbfd38 [diff] [blame]