[CodeGen] When promoting CTTZ operations to larger type, don't insert a select to detect if the input is zero to return the original size instead of the extended size. Instead just set the first bit in the zero extended part.
llvm-svn: 267280
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 688a4c4..a06932b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4010,18 +4010,20 @@
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
// Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
// already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- if (Node->getOpcode() == ISD::CTTZ) {
- // FIXME: This should set a bit in the zero extended value instead.
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
- Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT),
- ISD::SETEQ);
- Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
- DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ ||
- Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index fc1ce0c..4547393 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s
declare i8 @llvm.cttz.i8(i8, i1)
@@ -10,12 +10,9 @@
; CHECK-LABEL: t1:
; CHECK: # BB#0:
; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: tzcntl %eax, %ecx
-; CHECK-NEXT: cmpl $32, %ecx
-; CHECK-NEXT: movl $8, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: orl $256, %eax # imm = 0x100
+; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
ret i8 %tmp
}
@@ -25,7 +22,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntw %di, %ax
; CHECK-NEXT: retq
-;
%tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
ret i16 %tmp
}
@@ -35,7 +31,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
ret i32 %tmp
}
@@ -45,7 +40,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
ret i32 %tmp
@@ -56,7 +50,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
ret i64 %tmp
}
@@ -67,7 +60,6 @@
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
}
@@ -77,7 +69,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntw %di, %ax
; CHECK-NEXT: retq
-;
%tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
ret i16 %tmp
}
@@ -87,7 +78,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
ret i32 %tmp
}
@@ -97,7 +87,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: tzcntq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
ret i64 %tmp
}
@@ -107,7 +96,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y, %tmp1
ret i32 %tmp2
@@ -118,7 +106,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: andnl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y1, %tmp1
@@ -130,7 +117,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: andnq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp1 = xor i64 %x, -1
%tmp2 = and i64 %tmp1, %y
ret i64 %tmp2
@@ -143,7 +129,6 @@
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%notx = xor i32 %x, -1
%and = and i32 %notx, %y
%cmp = icmp eq i32 %and, 0
@@ -158,7 +143,6 @@
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, %y
%cmp = icmp eq i32 %and, %y
ret i1 %cmp
@@ -171,7 +155,6 @@
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
-;
%and = and i32 %y, %x
%cmp = icmp ne i32 %and, %y
ret i1 %cmp
@@ -184,7 +167,6 @@
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, %y
%cmp = icmp eq i32 %y, %and
ret i1 %cmp
@@ -197,7 +179,6 @@
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
-;
%and = and i32 %y, %x
%cmp = icmp ne i32 %y, %and
ret i1 %cmp
@@ -212,7 +193,6 @@
; CHECK-NEXT: cmpl $43, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, 43
%cmp = icmp eq i32 %and, 43
ret i1 %cmp
@@ -225,7 +205,6 @@
; CHECK-NEXT: andnq %rsi, %rdi, %rax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%notx = xor i64 %x, -1
%and = and i64 %y, %notx
%cmp = icmp eq i64 %and, 0
@@ -240,7 +219,6 @@
; CHECK-NEXT: testb %sil, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%noty = xor i8 %y, -1
%and = and i8 %x, %noty
%cmp = icmp eq i8 %and, 0
@@ -252,7 +230,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bextrl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -262,7 +239,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bextrl %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -276,7 +252,6 @@
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, %edi, %eax
; CHECK-NEXT: retq
-;
%1 = lshr i32 %x, 4
%2 = and i32 %1, 4095
ret i32 %2
@@ -288,7 +263,6 @@
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, (%rdi), %eax
; CHECK-NEXT: retq
-;
%1 = load i32, i32* %x
%2 = lshr i32 %1, 4
%3 = and i32 %2, 4095
@@ -300,7 +274,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -313,7 +286,6 @@
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, %edi, %eax
; CHECK-NEXT: retq
-;
%1 = lshr i64 %x, 4
%2 = and i64 %1, 4095
ret i64 %2
@@ -325,7 +297,6 @@
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, (%rdi), %eax
; CHECK-NEXT: retq
-;
%1 = load i64, i64* %x, align 8
%2 = lshr i64 %1, 4
%3 = and i64 %2, 4095
@@ -339,7 +310,6 @@
; CHECK-NEXT: andl $111, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%shr = lshr i32 %x, 2
%and = and i32 %shr, 111
@@ -353,7 +323,6 @@
; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%shr = lshr i64 %x, 2
%and = and i64 %shr, 8589934590
@@ -365,7 +334,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -375,7 +343,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bzhil %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -388,7 +355,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -400,7 +366,6 @@
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i32
%shl = shl i32 1, %conv
@@ -414,7 +379,6 @@
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
entry:
%x = load i32, i32* %w
%conv = zext i8 %index to i32
@@ -429,7 +393,6 @@
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i32
%shl = shl i32 1, %conv
@@ -443,7 +406,6 @@
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i64
%shl = shl i64 1, %conv
@@ -458,7 +420,6 @@
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: bzhiq %rax, %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%and = and i64 %x, 4611686018427387903
ret i64 %and
@@ -470,7 +431,6 @@
; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%and = and i64 %x, 2147483647
ret i64 %and
@@ -481,7 +441,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsil %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 0, %x
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -492,7 +451,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsil (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 0, %x1
%tmp2 = and i32 %x1, %tmp
@@ -504,7 +462,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsiq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 0, %x
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -515,7 +472,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsmskl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 %x, 1
%tmp2 = xor i32 %x, %tmp
ret i32 %tmp2
@@ -526,7 +482,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsmskl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = xor i32 %x1, %tmp
@@ -538,7 +493,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsmskq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 %x, 1
%tmp2 = xor i64 %tmp, %x
ret i64 %tmp2
@@ -549,7 +503,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsrl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 %x, 1
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -560,7 +513,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsrl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = and i32 %x1, %tmp
@@ -572,7 +524,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: blsrq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 %x, 1
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -583,7 +534,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pdepl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -593,7 +543,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pdepl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -606,7 +555,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pdepq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -618,7 +566,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pextl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -628,7 +575,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pextl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -641,7 +587,6 @@
; CHECK: # BB#0:
; CHECK-NEXT: pextq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
ret i64 %tmp
}