Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.
e.g.
Turns this loop:
LBB1_1: # entry.bb_crit_edge
        xorl    %ecx, %ecx
        xorw    %dx, %dx
        movw    %dx, %si
LBB1_2: # bb
        movl    L_X$non_lazy_ptr, %edi
        movw    %si, (%edi)
        movl    L_Y$non_lazy_ptr, %edi
        movw    %dx, (%edi)
		addw    $4, %dx
		incw    %si
		incl    %ecx
		cmpl    %eax, %ecx
		jne     LBB1_2  # bb
	
into

LBB1_1: # entry.bb_crit_edge
        xorl    %ecx, %ecx
        xorw    %dx, %dx
LBB1_2: # bb
        movl    L_X$non_lazy_ptr, %esi
        movw    %cx, (%esi)
        movl    L_Y$non_lazy_ptr, %esi
        movw    %dx, (%esi)
        addw    $4, %dx
		incl    %ecx
        cmpl    %eax, %ecx
        jne     LBB1_2  # bb


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43375 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index ccd15be..41b38d8 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -339,20 +339,18 @@
   for (i = 0; i < N; i++) { X = i; Y = i*4; }
 }
 
-LBB1_1:	#bb.preheader
-	xorl %ecx, %ecx
-	xorw %dx, %dx
-LBB1_2:	#bb
-	movl L_X$non_lazy_ptr, %esi
-	movw %dx, (%esi)
-	movw %dx, %si
-	shlw $2, %si
-	movl L_Y$non_lazy_ptr, %edi
-	movw %si, (%edi)
-	incl %ecx
-	incw %dx
-	cmpl %eax, %ecx
-	jne LBB1_2	#bb
+LBB1_1:	# entry.bb_crit_edge
+	xorl	%ecx, %ecx
+	xorw	%dx, %dx
+LBB1_2:	# bb
+	movl	L_X$non_lazy_ptr, %esi
+	movw	%cx, (%esi)
+	movl	L_Y$non_lazy_ptr, %esi
+	movw	%dx, (%esi)
+	addw	$4, %dx
+	incl	%ecx
+	cmpl	%eax, %ecx
+	jne	LBB1_2	# bb
 
 vs.
 
@@ -367,11 +365,7 @@
 	cmpl	%edx, %edi
 	jne	L4
 
-There are 3 issues:
-
-1. Lack of post regalloc LICM.
-2. LSR unable to reused IV for a different type (i16 vs. i32) even though
-   the cast would be free.
+This is due to the lack of post regalloc LICM.
 
 //===---------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4d528ab..172aa53 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5122,6 +5122,13 @@
 }
 
 
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+  if (!Ty1->isInteger() || !Ty2->isInteger())
+    return false;
+  return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
+}
+
+
 /// isShuffleMaskLegal - Targets can use this to indicate that they only
 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7123ada..b68de5a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -359,6 +359,11 @@
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
 
+    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+    /// register EAX to i16 by referencing its sub-register AX.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+  
     /// isShuffleMaskLegal - Targets can use this to indicate that they only
     /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
     /// By default, if a target supports the VECTOR_SHUFFLE node, all mask