[DAGCombiner] extend(ifpositive(X)) -> shift-right (not X)
This is almost the same as an existing IR canonicalization in instcombine,
so I'm assuming this is a good early generic DAG combine too.
The motivation comes from reduced bit-hacking for select-of-constants in IR
after rL331486. We want to restore that functionality in the DAG as noted in
the commit comments for that change and the llvm-dev discussion here:
http://lists.llvm.org/pipermail/llvm-dev/2018-July/124433.html
The PPC and AArch tests show that those targets are already doing something
similar. x86 will be neutral in the minimal case and generally better when
this pattern is extended with other ops as shown in the signbit-shift.ll tests.
Note the asymmetry: we don't include the (extend (ifneg X)) transform because
it already exists in SimplifySelectCC(), and that is verified in the later
unchanged tests in the signbit-shift.ll files. Without the 'not' op, the
general transform to use a shift is always a win because that's a single
instruction.
Alive proofs:
https://rise4fun.com/Alive/ysli
Name: if pos, get -1
%c = icmp sgt i16 %x, -1
%r = sext i1 %c to i16
=>
%n = xor i16 %x, -1
%r = ashr i16 %n, 15
Name: if pos, get 1
%c = icmp sgt i16 %x, -1
%r = zext i1 %c to i16
=>
%n = xor i16 %x, -1
%r = lshr i16 %n, 15
Differential Revision: https://reviews.llvm.org/D48970
llvm-svn: 337130
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index f9300ad..4d6dff4 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -6,8 +6,8 @@
define i32 @zext_ifpos(i32 %x) {
; CHECK-LABEL: zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w0, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: lsr w0, w8, #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -17,8 +17,8 @@
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w8, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: lsr w8, w8, #31
; CHECK-NEXT: add w0, w8, #41 // =41
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
@@ -42,8 +42,8 @@
define i32 @sext_ifpos(i32 %x) {
; CHECK-LABEL: sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1
-; CHECK-NEXT: eor w0, w8, w0, asr #31
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: asr w0, w8, #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -53,10 +53,9 @@
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w8, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
; CHECK-NEXT: mov w9, #42
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w0, w9, w8, lsr #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index e12a968..20d1ea5 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -114,7 +114,7 @@
}
; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
-; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
+; GCN: v_ashrrev_i32_e32 v2, 31, v2
define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index a64e71b..6d7897d 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -17,8 +17,8 @@
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
+; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: addi 3, 3, 41
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
@@ -54,8 +54,8 @@
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: srawi 3, 3, 31
; CHECK-NEXT: nor 3, 3, 3
+; CHECK-NEXT: srawi 3, 3, 31
; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
diff --git a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
index 30efe3d..0926d9e 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
@@ -99,9 +99,9 @@
; CHECK-LABEL: test_igesll_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: rldicl r3, r3, 1, 63
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: rldicl r3, r3, 1, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
@@ -115,9 +115,9 @@
; CHECK-LABEL: test_igesll_sext_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: sradi r3, r3, 63
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: std r3,
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
index 6fb5397..f2096c2 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
@@ -39,8 +39,8 @@
define i64 @test_llgesll_z(i64 %a) {
; CHECK-LABEL: test_llgesll_z:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: rldicl r3, r3, 1, 63
-; CHECK-NEXT: xori r3, r3, 1
; CHECK-NEXT: blr
entry:
%cmp = icmp sgt i64 %a, -1
@@ -51,8 +51,8 @@
define i64 @test_llgesll_sext_z(i64 %a) {
; CHECK-LABEL: test_llgesll_sext_z:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: blr
entry:
%cmp = icmp sgt i64 %a, -1
@@ -99,9 +99,9 @@
; CHECK-LABEL: test_llgesll_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: rldicl r3, r3, 1, 63
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: rldicl r3, r3, 1, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
@@ -115,9 +115,9 @@
; CHECK-LABEL: test_llgesll_sext_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: sradi r3, r3, 63
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4fc88d5..7d518ab 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -297,11 +297,10 @@
;
; MCU-LABEL: test7:
; MCU: # %bb.0:
-; MCU-NEXT: xorl %ecx, %ecx
-; MCU-NEXT: testl %eax, %eax
-; MCU-NEXT: setns %cl
-; MCU-NEXT: shll $4, %ecx
-; MCU-NEXT: fldt {{\.LCPI.*}}(%ecx)
+; MCU-NEXT: notl %eax
+; MCU-NEXT: shrl $27, %eax
+; MCU-NEXT: andl $-16, %eax
+; MCU-NEXT: fldt {{\.LCPI.*}}(%eax)
; MCU-NEXT: retl
%tmp9 = icmp sgt i32 %tmp8, -1
%retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
diff --git a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
index 833ac1d..2c7fb19 100644
--- a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
@@ -93,18 +93,18 @@
define i32 @pos_sel_constants(i32 %a) {
; CHECK-NOBMI-LABEL: pos_sel_constants:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: xorl %eax, %eax
-; CHECK-NOBMI-NEXT: testl %edi, %edi
-; CHECK-NOBMI-NEXT: setns %al
-; CHECK-NOBMI-NEXT: leal (%rax,%rax,4), %eax
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: notl %edi
+; CHECK-NOBMI-NEXT: shrl $31, %edi
+; CHECK-NOBMI-NEXT: leal (%rdi,%rdi,4), %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: pos_sel_constants:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %eax, %eax
-; CHECK-BMI-NEXT: testl %edi, %edi
-; CHECK-BMI-NEXT: setns %al
-; CHECK-BMI-NEXT: leal (%rax,%rax,4), %eax
+; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-BMI-NEXT: notl %edi
+; CHECK-BMI-NEXT: shrl $31, %edi
+; CHECK-BMI-NEXT: leal (%rdi,%rdi,4), %eax
; CHECK-BMI-NEXT: retq
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 5, i32 0
@@ -116,18 +116,18 @@
define i32 @pos_sel_special_constant(i32 %a) {
; CHECK-NOBMI-LABEL: pos_sel_special_constant:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: xorl %eax, %eax
-; CHECK-NOBMI-NEXT: testl %edi, %edi
-; CHECK-NOBMI-NEXT: setns %al
-; CHECK-NOBMI-NEXT: shll $9, %eax
+; CHECK-NOBMI-NEXT: notl %edi
+; CHECK-NOBMI-NEXT: shrl $22, %edi
+; CHECK-NOBMI-NEXT: andl $512, %edi # imm = 0x200
+; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: pos_sel_special_constant:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %eax, %eax
-; CHECK-BMI-NEXT: testl %edi, %edi
-; CHECK-BMI-NEXT: setns %al
-; CHECK-BMI-NEXT: shll $9, %eax
+; CHECK-BMI-NEXT: notl %edi
+; CHECK-BMI-NEXT: shrl $22, %edi
+; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200
+; CHECK-BMI-NEXT: movl %edi, %eax
; CHECK-BMI-NEXT: retq
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 512, i32 0
diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll
index d974187..743664f 100644
--- a/llvm/test/CodeGen/X86/signbit-shift.ll
+++ b/llvm/test/CodeGen/X86/signbit-shift.ll
@@ -6,9 +6,9 @@
define i32 @zext_ifpos(i32 %x) {
; CHECK-LABEL: zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -18,10 +18,10 @@
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: addl $41, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -32,10 +32,10 @@
define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: addl $41, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 42, i32 41
@@ -45,10 +45,9 @@
define i32 @sext_ifpos(i32 %x) {
; CHECK-LABEL: sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: negl %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -58,11 +57,10 @@
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %cl
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: subl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -73,11 +71,10 @@
define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %cl
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: subl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 41, i32 42
diff --git a/llvm/test/CodeGen/XCore/ashr.ll b/llvm/test/CodeGen/XCore/ashr.ll
index 78cb144..f4247dc 100644
--- a/llvm/test/CodeGen/XCore/ashr.ll
+++ b/llvm/test/CodeGen/XCore/ashr.ll
@@ -72,5 +72,6 @@
ret i32 %2
}
; CHECK-LABEL: f5:
-; CHECK-NEXT: ashr r0, r0, 32
-; CHECK-NEXT: eq r0, r0, 0
+; CHECK-NEXT: not r0, r0
+; CHECK-NEXT: mkmsk r1, 5
+; CHECK-NEXT: shr r0, r0, r1