[x86] use more shift or LEA for select-of-constants (2nd try) The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717

commit: 169dae70a680cdfa1779148eb9cb643bb76c8b0e [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Fri Aug 11 15:44:14 2017 +0000
committer: Sanjay Patel <spatel@rotateright.com> Fri Aug 11 15:44:14 2017 +0000
tree: 83e08148cec571ed6f42847d9ccc7658a73a0f96
parent: 1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a [diff] [blame]
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index 80d5149..4489aaf 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll

@@ -125,12 +125,9 @@
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    jmp .LBB4_3
 ; X86-NEXT:  .LBB4_1: # %res_block
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    incl %ecx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
-; X86-NEXT:    cmpw %si, %dx
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB4_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -149,9 +146,9 @@
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB4_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
   ret i32 %m
@@ -286,12 +283,9 @@
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    jmp .LBB9_3
 ; X86-NEXT:  .LBB9_1: # %res_block
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    incl %ecx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
-; X86-NEXT:    cmpl %esi, %edx
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB9_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -310,9 +304,9 @@
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB9_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
   ret i32 %m
@@ -381,12 +375,10 @@
 ; X86-NEXT:    cmpl %edx, %ecx
 ; X86-NEXT:    je .LBB11_3
 ; X86-NEXT:  .LBB11_1: # %res_block
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    incl %esi
 ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    cmovael %esi, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB11_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -531,10 +523,10 @@
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB15_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
   ret i32 %m
@@ -572,10 +564,10 @@
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB16_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
   ret i32 %m
commit	169dae70a680cdfa1779148eb9cb643bb76c8b0e	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Fri Aug 11 15:44:14 2017 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Fri Aug 11 15:44:14 2017 +0000
tree	83e08148cec571ed6f42847d9ccc7658a73a0f96
parent	1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a [diff] [blame]