[x86] X86ISelLowering zext(add_nuw(x, C)) --> add(zext(x), C_zext)
Currently X86ISelLowering has a similar transformation for sexts:
sext(add_nsw(x, C)) --> add(sext(x), C_sext)
In this change I extend this code to handle zexts as well.
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D23359
llvm-svn: 278520
diff --git a/llvm/test/CodeGen/X86/add-nsw-sext.ll b/llvm/test/CodeGen/X86/add-ext.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/add-nsw-sext.ll
rename to llvm/test/CodeGen/X86/add-ext.ll
index 658c58b..7a157ec 100644
--- a/llvm/test/CodeGen/X86/add-nsw-sext.ll
+++ b/llvm/test/CodeGen/X86/add-ext.ll
@@ -1,9 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-; The fundamental problem: an add separated from other arithmetic by a sext can't
-; be combined with the later instructions. However, if the first add is 'nsw',
-; then we can promote the sext ahead of that add to allow optimizations.
+; The fundamental problem: an add separated from other arithmetic by a sign or
+; zero extension can't be combined with the later instructions. However, if the
+; first add is 'nsw' or 'nuw' respectively, then we can promote the extension
+; ahead of that add to allow optimizations.
define i64 @add_nsw_consts(i32 %i) {
; CHECK-LABEL: add_nsw_consts:
@@ -166,3 +167,28 @@
ret void
}
+; The same as @PR20134 but sign extension is replaced with zero extension
+define void @PR20134_zext(i32* %a, i32 %i) {
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
+; CHECK-NEXT: retq
+
+ %add1 = add nuw i32 %i, 1
+ %idx1 = zext i32 %add1 to i64
+ %gep1 = getelementptr i32, i32* %a, i64 %idx1
+ %load1 = load i32, i32* %gep1, align 4
+
+ %add2 = add nuw i32 %i, 2
+ %idx2 = zext i32 %add2 to i64
+ %gep2 = getelementptr i32, i32* %a, i64 %idx2
+ %load2 = load i32, i32* %gep2, align 4
+
+ %add3 = add i32 %load1, %load2
+ %idx3 = zext i32 %i to i64
+ %gep3 = getelementptr i32, i32* %a, i64 %idx3
+ store i32 %add3, i32* %gep3, align 4
+ ret void
+}