CodeGenPrep: sink extends of illegal types into use block. Summary: This helps the instruction selector to lower an i64 * i64 -> i128 multiplication into a single instruction on targets which support it. This is an update of D2973 which was reverted because of a bug reported as PR19084. Reviewers: t.p.northover, chapuni Reviewed By: t.p.northover CC: llvm-commits, alex, chapuni Differential Revision: http://llvm-reviews.chandlerc.com/D3021 llvm-svn: 203797

commit: a7c48f99aead04bee55223e0e994a9125fd83526 [log] [tgz]
author: Manuel Jacob <me@manueljacob.de> Thu Mar 13 13:36:25 2014 +0000
committer: Manuel Jacob <me@manueljacob.de> Thu Mar 13 13:36:25 2014 +0000
tree: 03f7dd202ec3c1359322b75b5c17aa5f124f9997
parent: 65dbf46950946a67c867645407d99dcdd1f7348f [diff]
diff --git a/llvm/test/CodeGen/X86/mul128_sext_loop.ll b/llvm/test/CodeGen/X86/mul128_sext_loop.ll
new file mode 100644
index 0000000..a516f03
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mul128_sext_loop.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define void @test(i64* nocapture %arr, i64 %arrsize, i64 %factor) nounwind uwtable {
+  %1 = icmp sgt i64 %arrsize, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = sext i64 %factor to i128
+  br label %3
+
+; <label>:3                                       ; preds = %3, %.lr.ph
+; CHECK-NOT: mul
+; CHECK: imulq
+; CHECK-NOT: mul
+  %carry.02 = phi i128 [ 0, %.lr.ph ], [ %10, %3 ]
+  %i.01 = phi i64 [ 0, %.lr.ph ], [ %11, %3 ]
+  %4 = getelementptr inbounds i64* %arr, i64 %i.01
+  %5 = load i64* %4, align 8
+  %6 = sext i64 %5 to i128
+  %7 = mul nsw i128 %6, %2
+  %8 = add nsw i128 %7, %carry.02
+  %.tr = trunc i128 %8 to i64
+  %9 = and i64 %.tr, 9223372036854775807
+  store i64 %9, i64* %4, align 8
+  %10 = ashr i128 %8, 63
+  %11 = add nsw i64 %i.01, 1
+  %exitcond = icmp eq i64 %11, %arrsize
+  br i1 %exitcond, label %._crit_edge, label %3
+
+._crit_edge:                                      ; preds = %3, %0
+  ret void
+}

diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
new file mode 100644
index 0000000..430b992
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll

@@ -0,0 +1,64 @@
+; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The first cast should be sunk into block2, in order that the
+; instruction selector can form an efficient
+; i64 * i64 -> i128 multiplication.
+define i128 @sink(i64* %mem1, i64* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+block1:
+  %l1 = load i64* %mem1
+  %s1 = sext i64 %l1 to i128
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: sext
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %l2 = load i64* %mem2
+  %s2 = sext i64 %l2 to i128
+  %res = mul i128 %s1, %s2
+  ret i128 %res
+}
+
+; The first cast should be hoisted into block1, in order that the
+; instruction selector can form an extend-load.
+define i64 @hoist(i32* %mem1, i32* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block1:
+  %l1 = load i32* %mem1
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %s1 = sext i32 %l1 to i64
+  %l2 = load i32* %mem2
+  %s2 = sext i32 %l2 to i64
+  %res = mul i64 %s1, %s2
+  ret i64 %res
+}
+
+; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
+; loop.
+define i128 @use_ext_source() {
+block1:
+  %v1 = or i64 undef, undef
+  %v2 = zext i64 %v1 to i128
+  br i1 undef, label %block2, label %block3
+
+block2:
+  %v3 = add i64 %v1, 1
+  %v4 = zext i64 %v3 to i128
+  br label %block3
+
+block3:
+  %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
+  ret i128 %res
+}
commit	a7c48f99aead04bee55223e0e994a9125fd83526	[log] [tgz]
author	Manuel Jacob <me@manueljacob.de>	Thu Mar 13 13:36:25 2014 +0000
committer	Manuel Jacob <me@manueljacob.de>	Thu Mar 13 13:36:25 2014 +0000
tree	03f7dd202ec3c1359322b75b5c17aa5f124f9997
parent	65dbf46950946a67c867645407d99dcdd1f7348f [diff]