Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.
e.g.
Turns this loop:
LBB1_1: # entry.bb_crit_edge
xorl %ecx, %ecx
xorw %dx, %dx
movw %dx, %si
LBB1_2: # bb
movl L_X$non_lazy_ptr, %edi
movw %si, (%edi)
movl L_Y$non_lazy_ptr, %edi
movw %dx, (%edi)
addw $4, %dx
incw %si
incl %ecx
cmpl %eax, %ecx
jne LBB1_2 # bb
into
LBB1_1: # entry.bb_crit_edge
xorl %ecx, %ecx
xorw %dx, %dx
LBB1_2: # bb
movl L_X$non_lazy_ptr, %esi
movw %cx, (%esi)
movl L_Y$non_lazy_ptr, %esi
movw %dx, (%esi)
addw $4, %dx
incl %ecx
cmpl %eax, %ecx
jne LBB1_2 # bb
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43375 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index ccd15be..41b38d8 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -339,20 +339,18 @@
for (i = 0; i < N; i++) { X = i; Y = i*4; }
}
-LBB1_1: #bb.preheader
- xorl %ecx, %ecx
- xorw %dx, %dx
-LBB1_2: #bb
- movl L_X$non_lazy_ptr, %esi
- movw %dx, (%esi)
- movw %dx, %si
- shlw $2, %si
- movl L_Y$non_lazy_ptr, %edi
- movw %si, (%edi)
- incl %ecx
- incw %dx
- cmpl %eax, %ecx
- jne LBB1_2 #bb
+LBB1_1: # entry.bb_crit_edge
+ xorl %ecx, %ecx
+ xorw %dx, %dx
+LBB1_2: # bb
+ movl L_X$non_lazy_ptr, %esi
+ movw %cx, (%esi)
+ movl L_Y$non_lazy_ptr, %esi
+ movw %dx, (%esi)
+ addw $4, %dx
+ incl %ecx
+ cmpl %eax, %ecx
+ jne LBB1_2 # bb
vs.
@@ -367,11 +365,7 @@
cmpl %edx, %edi
jne L4
-There are 3 issues:
-
-1. Lack of post regalloc LICM.
-2. LSR unable to reused IV for a different type (i16 vs. i32) even though
- the cast would be free.
+This is due to the lack of post regalloc LICM.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4d528ab..172aa53 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5122,6 +5122,13 @@
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
+}
+
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7123ada..b68de5a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -359,6 +359,11 @@
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask