[DAGCombiner] Support non-zero offset in load combine
Enable folding patterns which load the value from non-zero offset:
i8 *a = ...
i32 val = a[4] | (a[5] << 8) | (a[6] << 16) | (a[7] << 24)
=>
i32 val = *((i32*)(a+4))
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D29394
llvm-svn: 294582
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index c986670..a3e604f 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -571,37 +571,18 @@
ret i32 %tmp19
}
-; Non-zero offsets are not supported for now
; i8* p;
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl 1(%eax), %ecx
-; CHECK-NEXT: movzbl 2(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl 3(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl 4(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl 1(%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl 1(%rdi), %eax
-; CHECK64-NEXT: movzbl 2(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl 3(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 4(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl 1(%rdi), %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
@@ -632,30 +613,12 @@
; CHECK-LABEL: load_i32_by_i8_neg_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl -4(%eax), %ecx
-; CHECK-NEXT: movzbl -3(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl -2(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl -1(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl -4(%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_neg_offset:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl -4(%rdi), %eax
-; CHECK64-NEXT: movzbl -3(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl -2(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl -1(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl -4(%rdi), %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
@@ -686,30 +649,14 @@
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl 4(%eax), %ecx
-; CHECK-NEXT: movzbl 3(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl 2(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl 1(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl 1(%eax), %eax
+; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl 4(%rdi), %eax
-; CHECK64-NEXT: movzbl 3(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl 2(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 1(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl 1(%rdi), %eax
+; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
@@ -740,30 +687,14 @@
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl -1(%eax), %ecx
-; CHECK-NEXT: movzbl -2(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl -3(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl -4(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl -4(%eax), %eax
+; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl -1(%rdi), %eax
-; CHECK64-NEXT: movzbl -2(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl -3(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl -4(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl -4(%rdi), %eax
+; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
@@ -1017,16 +948,7 @@
; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl %esi, %eax
-; CHECK64-NEXT: movzbl 13(%rdi,%rax), %ecx
-; CHECK64-NEXT: movzbl 14(%rdi,%rax), %edx
-; CHECK64-NEXT: shll $8, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 15(%rdi,%rax), %ecx
-; CHECK64-NEXT: shll $16, %ecx
-; CHECK64-NEXT: orl %edx, %ecx
-; CHECK64-NEXT: movzbl 16(%rdi,%rax), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %ecx, %eax
+; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
; CHECK64-NEXT: retq
%tmp = add nuw nsw i32 %i, 4
%tmp2 = add nuw nsw i32 %i, 3