Generalize ExtendUsesToFormExtLoad to be usable for ANY_EXTEND,
in addition to ZERO_EXTEND and SIGN_EXTEND. Fix a bug in the
way it checked for live-out values, and simplify the way it
find users by using SDNode::use_iterator's (relatively) new
features. Also, make it slightly more permissive on targets
with free truncates.
In SelectionDAGBuild, avoid creating ANY_EXTEND nodes that are
larger than necessary. If the target's SwitchAmountTy has
enough bits, use it. This exposes the truncate to optimization
early, enabling more optimizations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68670 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll b/test/CodeGen/X86/2008-09-10-SpillerBug2.ll
deleted file mode 100644
index a1b4ccc..0000000
--- a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movw | not grep %e.x
-; PR2681
-
-@g_491 = external global i32 ; <i32*> [#uses=1]
-@g_897 = external global i16 ; <i16*> [#uses=1]
-
-define i32 @func_7(i16 signext %p_9) nounwind {
-entry:
- %p_9.addr = alloca i16 ; <i16*> [#uses=2]
- %l_1122 = alloca i16, align 2 ; <i16*> [#uses=1]
- %l_1128 = alloca i32, align 4 ; <i32*> [#uses=1]
- %l_1129 = alloca i32, align 4 ; <i32*> [#uses=1]
- %l_1130 = alloca i32, align 4 ; <i32*> [#uses=1]
- %tmp14 = load i16* %l_1122 ; <i16> [#uses=1]
- %conv15 = sext i16 %tmp14 to i32 ; <i32> [#uses=1]
- %tmp16 = load i16* %p_9.addr ; <i16> [#uses=1]
- %conv17 = sext i16 %tmp16 to i32 ; <i32> [#uses=1]
- %xor = xor i32 %conv15, %conv17 ; <i32> [#uses=1]
- %tmp18 = load i32* null ; <i32> [#uses=1]
- %or = or i32 %xor, %tmp18 ; <i32> [#uses=1]
- %conv19 = trunc i32 %or to i16 ; <i16> [#uses=1]
- %tmp28 = load i16* %p_9.addr ; <i16> [#uses=1]
- %tmp33 = load i16* @g_897 ; <i16> [#uses=1]
- %tmp34 = load i32* @g_491 ; <i32> [#uses=1]
- %conv35 = trunc i32 %tmp34 to i16 ; <i16> [#uses=1]
- %tmp36 = load i16* null ; <i16> [#uses=1]
- %conv37 = trunc i16 %tmp36 to i8 ; <i8> [#uses=1]
- %tmp38 = load i32* %l_1128 ; <i32> [#uses=1]
- %conv39 = sext i32 %tmp38 to i64 ; <i64> [#uses=1]
- %tmp42 = load i32* %l_1129 ; <i32> [#uses=1]
- %conv43 = trunc i32 %tmp42 to i16 ; <i16> [#uses=1]
- %tmp44 = load i32* %l_1130 ; <i32> [#uses=1]
- %conv45 = sext i32 %tmp44 to i64 ; <i64> [#uses=1]
- %call46 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext %tmp33, i16 zeroext %conv35, i8 zeroext %conv37, i64 %conv39, i32 0, i16 zeroext %conv43, i64 %conv45, i8 zeroext 1 ) ; <i32> [#uses=0]
- %call48 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %tmp28, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
- %call50 = call i32 @func_18( i16 zeroext 1, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %conv19, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
- ret i32 undef
-}
-
-declare i32 @func_18(i16 zeroext, i16 zeroext, i16 zeroext, i16 zeroext, i8 zeroext, i64, i32, i16 zeroext, i64, i8 zeroext)
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
new file mode 100644
index 0000000..e8c3cf0
--- /dev/null
+++ b/test/CodeGen/X86/anyext-uses.ll
@@ -0,0 +1,47 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep mov %t | count 8
+; RUN: not grep implicit %t
+
+; Avoid partial register updates; don't define an i8 register and read
+; the i32 super-register.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+ %struct.RC4_KEY = type { i8, i8, [256 x i8] }
+
+define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind {
+entry:
+ br label %bb24
+
+bb24: ; preds = %bb24, %entry
+ %0 = load i8* null, align 1 ; <i8> [#uses=1]
+ %1 = zext i8 %0 to i64 ; <i64> [#uses=1]
+ %2 = shl i64 %1, 32 ; <i64> [#uses=1]
+ %3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
+ %4 = load i8* %3, align 1 ; <i8> [#uses=2]
+ %5 = add i8 %4, 0 ; <i8> [#uses=2]
+ %6 = zext i8 %5 to i64 ; <i64> [#uses=0]
+ %7 = load i8* null, align 1 ; <i8> [#uses=1]
+ %8 = zext i8 %4 to i32 ; <i32> [#uses=1]
+ %9 = zext i8 %7 to i32 ; <i32> [#uses=1]
+ %10 = add i32 %9, %8 ; <i32> [#uses=1]
+ %11 = and i32 %10, 255 ; <i32> [#uses=1]
+ %12 = zext i32 %11 to i64 ; <i64> [#uses=1]
+ %13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; <i8*> [#uses=1]
+ %14 = load i8* %13, align 1 ; <i8> [#uses=1]
+ %15 = zext i8 %14 to i64 ; <i64> [#uses=1]
+ %16 = shl i64 %15, 48 ; <i64> [#uses=1]
+ %17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
+ %18 = load i8* %17, align 1 ; <i8> [#uses=2]
+ %19 = add i8 %18, %5 ; <i8> [#uses=1]
+ %20 = zext i8 %19 to i64 ; <i64> [#uses=1]
+ %21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; <i8*> [#uses=1]
+ store i8 %18, i8* %21, align 1
+ %22 = or i64 0, %2 ; <i64> [#uses=1]
+ %23 = or i64 %22, 0 ; <i64> [#uses=1]
+ %24 = or i64 %23, %16 ; <i64> [#uses=1]
+ %25 = or i64 %24, 0 ; <i64> [#uses=1]
+ %26 = xor i64 %25, 0 ; <i64> [#uses=1]
+ store i64 %26, i64* null, align 8
+ br label %bb24
+}
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
new file mode 100644
index 0000000..f3c701f
--- /dev/null
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -0,0 +1,34 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+
+; Do zextload, instead of a load and a separate zext.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+ %struct.move_s = type { i32, i32, i32, i32, i32, i32 }
+ %struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
+
+define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
+entry:
+ %0 = load i8* null, align 1 ; <i8> [#uses=1]
+ switch i8 %0, label %return [
+ i8 2, label %bb31
+ i8 0, label %bb80
+ i8 1, label %bb82
+ i8 3, label %bb84
+ ]
+
+bb31: ; preds = %entry
+ unreachable
+
+bb80: ; preds = %entry
+ ret void
+
+bb82: ; preds = %entry
+ ret void
+
+bb84: ; preds = %entry
+ ret void
+
+return: ; preds = %entry
+ ret void
+}