Fix a bug in ReduceLoadWidth that wasn't handling extending
loads properly. We miscompiled the testcase into:
_test: ## @test
movl $128, (%rdi)
movzbl 1(%rdi), %eax
ret
Now we get a proper:
_test: ## @test
movl $128, (%rdi)
movsbl (%rdi), %eax
movzbl %ah, %eax
ret
This fixes PR8757.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122392 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2523bb2..e4f3027 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4241,12 +4241,15 @@
return SDValue();
}
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
// If the load was a sextload then the result is a splat of the sign bit
// of the extended byte. This is not worth optimizing for.
- if (ShAmt >= VT.getSizeInBits())
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
return SDValue();
}
}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 53b0388..ef27cbc 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -6,7 +6,7 @@
; DAGCombiner should fold this code in finite time.
; rdar://8606584
-define void @D() nounwind readnone {
+define void @test1() nounwind readnone {
bb.nph:
br label %while.cond
@@ -33,10 +33,10 @@
; DAGCombiner shouldn't fold the sdiv (ashr) away.
; rdar://8636812
-; CHECK: main:
+; CHECK: test2:
; CHECK: sarl
-define i32 @main() nounwind {
+define i32 @test2() nounwind {
entry:
%i = alloca i32, align 4
%j = alloca i8, align 1
@@ -63,3 +63,21 @@
declare void @abort() noreturn
declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+ volatile store i32 128, i32* %P
+ %tmp4.pre = load i32* %P
+ %phitmp = trunc i32 %tmp4.pre to i16
+ %phitmp13 = shl i16 %phitmp, 8
+ %phitmp14 = ashr i16 %phitmp13, 8
+ %phitmp15 = lshr i16 %phitmp14, 8
+ %phitmp16 = zext i16 %phitmp15 to i32
+ ret i32 %phitmp16
+
+; CHECK: movl $128, (%rdi)
+; CHECK-NEXT: movsbl (%rdi), %eax
+; CHECK-NEXT: movzbl %ah, %eax
+; CHECK-NEXT: ret
+}