[PPC64LE] More vector swap optimization TLC This makes one substantive change and a few stylistic changes to the VSX swap optimization pass. The substantive change is to permit LXSDX and LXSSPX instructions to participate in swap optimization computations. The previous change to insert a swap following a SUBREG_TO_REG widening operation makes this almost trivial. I experimented with also permitting STXSDX and STXSSPX instructions. This can be done using similar techniques: we could insert a swap prior to a narrowing COPY operation, and then permit these stores to participate. I prototyped this, but discovered that the pattern of a narrowing COPY followed by an STXSDX does not occur in any of our test-suite code. So instead, I added commentary indicating that this could be done. Other TLC: - I changed SH_COPYSCALAR to SH_COPYWIDEN to more clearly indicate the direction of the copy. - I factored the insertion of swap instructions into a separate function. Finally, I added a new test case to check that the scalar-to-vector loads are working properly with swap optimization. llvm-svn: 242838

commit: 2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311 [log] [tgz]
author: Bill Schmidt <wschmidt@linux.vnet.ibm.com> Tue Jul 21 21:40:17 2015 +0000
committer: Bill Schmidt <wschmidt@linux.vnet.ibm.com> Tue Jul 21 21:40:17 2015 +0000
tree: b7a050ac1060d2a5c5a44d011033bf7a7a2e2bef
parent: c1fbb3540a22c64b0afcfb3c2e99171ae7b13414 [diff] [blame]
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
new file mode 100644
index 0000000..365aeee
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll

@@ -0,0 +1,44 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; These tests verify that VSX swap optimization works when loading a scalar
+; into a vector register.
+
+
+@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
+@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
+@y = global double 1.780000e+00, align 8
+
+define void @bar0() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @x, align 16
+  %1 = load double, double* @y, align 8
+  %vecins = insertelement <2 x double> %0, double %1, i32 0
+  store <2 x double> %vecins, <2 x double>* @z, align 16
+  ret void
+}
+
+; CHECK-LABEL: @bar0
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK: stxvd2x [[REG5]]
+
+define void @bar1() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @x, align 16
+  %1 = load double, double* @y, align 8
+  %vecins = insertelement <2 x double> %0, double %1, i32 1
+  store <2 x double> %vecins, <2 x double>* @z, align 16
+  ret void
+}
+
+; CHECK-LABEL: @bar1
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK: stxvd2x [[REG5]]
+
commit	2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311	[log] [tgz]
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	Tue Jul 21 21:40:17 2015 +0000
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	Tue Jul 21 21:40:17 2015 +0000
tree	b7a050ac1060d2a5c5a44d011033bf7a7a2e2bef
parent	c1fbb3540a22c64b0afcfb3c2e99171ae7b13414 [diff] [blame]