[DAGCombine] Fix for a load combine bug with non-zero offset patterns on BE targets This pattern is essentially a i16 load from p+1 address: %p1.i16 = bitcast i8* %p to i16* %p2.i8 = getelementptr i8, i8* %p, i64 2 %v1 = load i16, i16* %p1.i16 %v2.i8 = load i8, i8* %p2.i8 %v2 = zext i8 %v2.i8 to i16 %v1.shl = shl i16 %v1, 8 %res = or i16 %v1.shl, %v2 Current implementation would identify %v1 load as the first byte load and would mistakenly emit a i16 load from %p1.i16 address. This patch adds a check that the first byte is loaded from a non-zero offset of the first load address. This way this address can be used as the base address for the combined value. Otherwise just give up combining. llvm-svn: 296336

commit: f7196c8d9ecf17f0e2834aba94bc9dfc7c46b718 [log] [tgz]
author: Artur Pilipenko <apilipenko@azulsystems.com> Mon Feb 27 13:04:23 2017 +0000
committer: Artur Pilipenko <apilipenko@azulsystems.com> Mon Feb 27 13:04:23 2017 +0000
tree: b5b5bfa7904b90975a9eb976297fc9270533e8f4
parent: 4b2b6bfb97ee5062142a4efe0aabef408fd2a6bc [diff]
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 26cb3a69..af3d170 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -4593,6 +4593,10 @@
   assert((BigEndian != LittleEndian) && "should be either or");
   assert(FirstByteProvider && "must be set");
 
+  // Ensure that the first byte is loaded from zero offset of the first load.
+  // So the combined value can be loaded from the first load address.
+  if (MemoryByteOffset(*FirstByteProvider) != 0)
+    return SDValue();
   LoadSDNode *FirstLoad = FirstByteProvider->Load;
 
   // The node we are looking at matches with the pattern, check if we can

diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
index 8e533b1..e60e86a 100644
--- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll

@@ -563,3 +563,26 @@
   %tmp8 = or i32 %tmp7, %tmp30
   ret i32 %tmp8
 }
+
+; i8* p;
+; i16* p1.i16 = (i16*) p;
+; (p1.i16[0] << 8) | ((i16) p[2])
+;
+; This is essentialy a i16 load from p[1], but we don't fold the pattern now
+; because in the original DAG we don't have p[1] address available
+define i16 @load_i16_from_nonzero_offset(i8* %p) {
+; CHECK-LABEL: load_i16_from_nonzero_offset:
+; CHECK:  ldrh    w8, [x0]
+; CHECK-NEXT: ldrb  w0, [x0, #2]
+; CHECK-NEXT: bfi w0, w8, #8, #24
+; CHECK-NEXT: ret
+
+  %p1.i16 = bitcast i8* %p to i16*
+  %p2.i8 = getelementptr i8, i8* %p, i64 2
+  %v1 = load i16, i16* %p1.i16
+  %v2.i8 = load i8, i8* %p2.i8
+  %v2 = zext i8 %v2.i8 to i16
+  %v1.shl = shl i16 %v1, 8
+  %res = or i16 %v1.shl, %v2
+  ret i16 %res
+}

diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 047c732..4068be9 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll

@@ -753,3 +753,32 @@
   %tmp8 = or i32 %tmp7, %tmp30
   ret i32 %tmp8
 }
+
+; i8* p;
+; i16* p1.i16 = (i16*) p;
+; (p1.i16[0] << 8) | ((i16) p[2])
+;
+; This is essentialy a i16 load from p[1], but we don't fold the pattern now
+; because in the original DAG we don't have p[1] address available
+define i16 @load_i16_from_nonzero_offset(i8* %p) {
+; CHECK-LABEL: load_i16_from_nonzero_offset:
+; CHECK: ldrh  r1, [r0]
+; CHECK-NEXT: ldrb  r0, [r0, #2]
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
+; CHECK-ARMv6: ldrh  r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #2]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx  lr
+
+  %p1.i16 = bitcast i8* %p to i16*
+  %p2.i8 = getelementptr i8, i8* %p, i64 2
+  %v1 = load i16, i16* %p1.i16
+  %v2.i8 = load i8, i8* %p2.i8
+  %v2 = zext i8 %v2.i8 to i16
+  %v1.shl = shl i16 %v1, 8
+  %res = or i16 %v1.shl, %v2
+  ret i16 %res
+}
commit	f7196c8d9ecf17f0e2834aba94bc9dfc7c46b718	[log] [tgz]
author	Artur Pilipenko <apilipenko@azulsystems.com>	Mon Feb 27 13:04:23 2017 +0000
committer	Artur Pilipenko <apilipenko@azulsystems.com>	Mon Feb 27 13:04:23 2017 +0000
tree	b5b5bfa7904b90975a9eb976297fc9270533e8f4
parent	4b2b6bfb97ee5062142a4efe0aabef408fd2a6bc [diff]