Generalize ExtendUsesToFormExtLoad to be usable for ANY_EXTEND,
in addition to ZERO_EXTEND and SIGN_EXTEND. Fix a bug in the
way it checked for live-out values, and simplify the way it
find users by using SDNode::use_iterator's (relatively) new
features. Also, make it slightly more permissive on targets
with free truncates.

In SelectionDAGBuild, avoid creating ANY_EXTEND nodes that are
larger than necessary. If the target's SwitchAmountTy has
enough bits, use it. This exposes the truncate to optimization
early, enabling more optimizations.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68670 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll b/test/CodeGen/X86/2008-09-10-SpillerBug2.ll
deleted file mode 100644
index a1b4ccc..0000000
--- a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movw | not grep %e.x
-; PR2681
-
-@g_491 = external global i32		; <i32*> [#uses=1]
-@g_897 = external global i16		; <i16*> [#uses=1]
-
-define i32 @func_7(i16 signext %p_9) nounwind {
-entry:
-	%p_9.addr = alloca i16		; <i16*> [#uses=2]
-	%l_1122 = alloca i16, align 2		; <i16*> [#uses=1]
-	%l_1128 = alloca i32, align 4		; <i32*> [#uses=1]
-	%l_1129 = alloca i32, align 4		; <i32*> [#uses=1]
-	%l_1130 = alloca i32, align 4		; <i32*> [#uses=1]
-	%tmp14 = load i16* %l_1122		; <i16> [#uses=1]
-	%conv15 = sext i16 %tmp14 to i32		; <i32> [#uses=1]
-	%tmp16 = load i16* %p_9.addr		; <i16> [#uses=1]
-	%conv17 = sext i16 %tmp16 to i32		; <i32> [#uses=1]
-	%xor = xor i32 %conv15, %conv17		; <i32> [#uses=1]
-	%tmp18 = load i32* null		; <i32> [#uses=1]
-	%or = or i32 %xor, %tmp18		; <i32> [#uses=1]
-	%conv19 = trunc i32 %or to i16		; <i16> [#uses=1]
-	%tmp28 = load i16* %p_9.addr		; <i16> [#uses=1]
-	%tmp33 = load i16* @g_897		; <i16> [#uses=1]
-	%tmp34 = load i32* @g_491		; <i32> [#uses=1]
-	%conv35 = trunc i32 %tmp34 to i16		; <i16> [#uses=1]
-	%tmp36 = load i16* null		; <i16> [#uses=1]
-	%conv37 = trunc i16 %tmp36 to i8		; <i8> [#uses=1]
-	%tmp38 = load i32* %l_1128		; <i32> [#uses=1]
-	%conv39 = sext i32 %tmp38 to i64		; <i64> [#uses=1]
-	%tmp42 = load i32* %l_1129		; <i32> [#uses=1]
-	%conv43 = trunc i32 %tmp42 to i16		; <i16> [#uses=1]
-	%tmp44 = load i32* %l_1130		; <i32> [#uses=1]
-	%conv45 = sext i32 %tmp44 to i64		; <i64> [#uses=1]
-	%call46 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext %tmp33, i16 zeroext %conv35, i8 zeroext %conv37, i64 %conv39, i32 0, i16 zeroext %conv43, i64 %conv45, i8 zeroext 1 )		; <i32> [#uses=0]
-	%call48 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %tmp28, i64 0, i8 zeroext 1 )		; <i32> [#uses=0]
-	%call50 = call i32 @func_18( i16 zeroext 1, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %conv19, i64 0, i8 zeroext 1 )		; <i32> [#uses=0]
-	ret i32 undef
-}
-
-declare i32 @func_18(i16 zeroext, i16 zeroext, i16 zeroext, i16 zeroext, i8 zeroext, i64, i32, i16 zeroext, i64, i8 zeroext)
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
new file mode 100644
index 0000000..e8c3cf0
--- /dev/null
+++ b/test/CodeGen/X86/anyext-uses.ll
@@ -0,0 +1,47 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep mov %t | count 8
+; RUN: not grep implicit %t
+
+; Avoid partial register updates; don't define an i8 register and read
+; the i32 super-register.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+	%struct.RC4_KEY = type { i8, i8, [256 x i8] }
+
+define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind {
+entry:
+	br label %bb24
+
+bb24:		; preds = %bb24, %entry
+	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	%1 = zext i8 %0 to i64		; <i64> [#uses=1]
+	%2 = shl i64 %1, 32		; <i64> [#uses=1]
+	%3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=2]
+	%5 = add i8 %4, 0		; <i8> [#uses=2]
+	%6 = zext i8 %5 to i64		; <i64> [#uses=0]
+	%7 = load i8* null, align 1		; <i8> [#uses=1]
+	%8 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%9 = zext i8 %7 to i32		; <i32> [#uses=1]
+	%10 = add i32 %9, %8		; <i32> [#uses=1]
+	%11 = and i32 %10, 255		; <i32> [#uses=1]
+	%12 = zext i32 %11 to i64		; <i64> [#uses=1]
+	%13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12		; <i8*> [#uses=1]
+	%14 = load i8* %13, align 1		; <i8> [#uses=1]
+	%15 = zext i8 %14 to i64		; <i64> [#uses=1]
+	%16 = shl i64 %15, 48		; <i64> [#uses=1]
+	%17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0		; <i8*> [#uses=1]
+	%18 = load i8* %17, align 1		; <i8> [#uses=2]
+	%19 = add i8 %18, %5		; <i8> [#uses=1]
+	%20 = zext i8 %19 to i64		; <i64> [#uses=1]
+	%21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20		; <i8*> [#uses=1]
+	store i8 %18, i8* %21, align 1
+	%22 = or i64 0, %2		; <i64> [#uses=1]
+	%23 = or i64 %22, 0		; <i64> [#uses=1]
+	%24 = or i64 %23, %16		; <i64> [#uses=1]
+	%25 = or i64 %24, 0		; <i64> [#uses=1]
+	%26 = xor i64 %25, 0		; <i64> [#uses=1]
+	store i64 %26, i64* null, align 8
+	br label %bb24
+}
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
new file mode 100644
index 0000000..f3c701f
--- /dev/null
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -0,0 +1,34 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+
+; Do zextload, instead of a load and a separate zext.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
+	%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
+
+define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
+entry:
+	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %return [
+		i8 2, label %bb31
+		i8 0, label %bb80
+		i8 1, label %bb82
+		i8 3, label %bb84
+	]
+
+bb31:		; preds = %entry
+	unreachable
+
+bb80:		; preds = %entry
+	ret void
+
+bb82:		; preds = %entry
+	ret void
+
+bb84:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}