[PPC] Heuristic to choose between a X-Form VSX ld/st vs a X-Form FP ld/st.
The VSX versions have the advantage of a full 64-register target whereas the FP
ones have the advantage of lower latency and higher throughput. So what we’re
after is using the faster instructions in low register pressure situations and
using the larger register file in high register pressure situations.
The heuristic chooses between the following 7 pairs of instructions.
PPC::LXSSPX vs PPC::LFSX
PPC::LXSDX vs PPC::LFDX
PPC::STXSSPX vs PPC::STFSX
PPC::STXSDX vs PPC::STFDX
PPC::LXSIWAX vs PPC::LFIWAX
PPC::LXSIWZX vs PPC::LFIWZX
PPC::STXSIWX vs PPC::STFIWX
Differential Revision: https://reviews.llvm.org/D38486
llvm-svn: 318651
diff --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
new file mode 100644
index 0000000..e38c5be
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9
+
+@a = external local_unnamed_addr global <4 x i32>, align 16
+@pb = external local_unnamed_addr global float*, align 8
+
+define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
+; CHECK-P8-LABEL: testExpandPostRAPseudo:
+; CHECK-P8: lxsiwax 34, 0, 3
+; CHECK-P8-NEXT: xxspltw 34, 34, 1
+; CHECK-P8-NEXT: stvx 2, 0, 4
+; CHECK-P8: #APP
+; CHECK-P8-NEXT: #Clobber Rigisters
+; CHECK-P8-NEXT: #NO_APP
+; CHECK-P8-NEXT: lis 4, 1024
+; CHECK-P8-NEXT: lfiwax 0, 0, 3
+; CHECK-P8: stfsx 0, 3, 4
+; CHECK-P8-NEXT: blr
+
+; CHECK-P9-LABEL: testExpandPostRAPseudo:
+; CHECK-P9: lxvwsx 0, 0, 3
+; CHECK-P9: stxvx 0, 0, 4
+; CHECK-P9: #APP
+; CHECK-P9-NEXT: #Clobber Rigisters
+; CHECK-P9-NEXT: #NO_APP
+; CHECK-P9-NEXT: lis 4, 1024
+; CHECK-P9-NEXT: lfiwax 0, 0, 3
+; CHECK-P9: stfsx 0, 3, 4
+; CHECK-P9-NEXT: blr
+
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ store <4 x i32> %splat.splat, <4 x i32>* @a, align 16
+ tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+ %1 = load i32, i32* %ptr, align 4
+ %conv = sitofp i32 %1 to float
+ %2 = load float*, float** @pb, align 8
+ %add.ptr = getelementptr inbounds float, float* %2, i64 16777216
+ store float %conv, float* %add.ptr, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index d71460b..fd1f458 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1485,10 +1485,10 @@
; P9BE: xvcvspsxws v2, [[REG1]]
; P9LE: [[REG1:[vs0-9]+]], 0, r3
; P9LE: xvcvspsxws v2, [[REG1]]
-; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]]
; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]]
; P8LE: xxspltw v2, vs[[REG2]], 1
}
@@ -1880,11 +1880,11 @@
; P9LE: xscvdpsxws
; P9LE: xxspltw
; P9LE: blr
-; P8BE: lxsdx
+; P8BE: lfdx
; P8BE: xscvdpsxws
; P8BE: xxspltw
; P8BE: blr
-; P8LE: lxsdx
+; P8LE: lfdx
; P8LE: xscvdpsxws
; P8LE: xxspltw
; P8LE: blr
@@ -2645,10 +2645,10 @@
; P9BE: xvcvspuxws v2, [[REG1]]
; P9LE: [[REG1:[vs0-9]+]], 0, r3
; P9LE: xvcvspuxws v2, [[REG1]]
-; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]]
; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]]
; P8LE: xxspltw v2, vs[[REG2]], 1
}
@@ -3040,11 +3040,11 @@
; P9LE: xscvdpuxws
; P9LE: xxspltw
; P9LE: blr
-; P8BE: lxsdx
+; P8BE: lfdx
; P8BE: xscvdpuxws
; P8BE: xxspltw
; P8BE: blr
-; P8LE: lxsdx
+; P8LE: lfdx
; P8LE: xscvdpuxws
; P8LE: xxspltw
; P8LE: blr
@@ -3508,13 +3508,13 @@
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3546,13 +3546,13 @@
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3591,13 +3591,13 @@
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3636,13 +3636,13 @@
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3693,11 +3693,11 @@
; P9LE-NEXT: xscvdpsxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE-NEXT: xscvdpsxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE-NEXT: xscvdpsxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr
@@ -4412,13 +4412,13 @@
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4450,13 +4450,13 @@
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4495,13 +4495,13 @@
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4540,13 +4540,13 @@
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4597,11 +4597,11 @@
; P9LE-NEXT: xscvdpuxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE-NEXT: xscvdpuxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE-NEXT: xscvdpuxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/direct-move-profit.ll b/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
index 423f0ff..7205d11 100644
--- a/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
+++ b/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
@@ -17,7 +17,7 @@
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
-; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK: lfiwax [[REG:[0-9]+]], {{.*}}
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
; CHECK: xscvsxdsp {{.*}}, [[REG]]
@@ -40,7 +40,7 @@
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
-; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK: lfiwax [[REG:[0-9]+]], {{.*}}
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
; CHECK: xscvsxdsp {{.*}}, [[REG]]
diff --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index 783833d..0e50b3a 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -1034,10 +1034,10 @@
define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) {
entry:
; CHECK-LABEL: insertVarF
-; CHECK: stxsspx 1,
+; CHECK: stfsx 1,
; CHECK: lxv
; CHECK-BE-LABEL: insertVarF
-; CHECK-BE: stxsspx 1,
+; CHECK-BE: stfsx 1,
; CHECK-BE: lxv
%vecins = insertelement <4 x float> %a, float %f, i32 %el
ret <4 x float> %vecins
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
index 3a42540..0143067 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -43,7 +43,7 @@
}
; CHECK: @callee2
; CHECK: addi [[TOCREG:[0-9]+]], 1, 136
-; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
+; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
; CHECK: blr
define void @caller2() {
@@ -54,7 +54,7 @@
}
; CHECK: @caller2
; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136
-; CHECK: stxsspx {{[0-9]+}}, 0, [[TOCOFF]]
+; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]]
; CHECK: bl test2
declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
index aacd64e..02301ea 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -57,7 +57,7 @@
}
; CHECK-LABEL: @aercalc_
-; CHECK: lxsspx
+; CHECK: lfsx
; CHECK: xxspltd
; CHECK: stxvd2x
; CHECK-NOT: xxswapd
diff --git a/llvm/test/CodeGen/PowerPC/pr25157.ll b/llvm/test/CodeGen/PowerPC/pr25157.ll
index ee9a003..27f50b0 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157.ll
@@ -57,6 +57,6 @@
}
; CHECK-LABEL: @aercalc_
-; CHECK: lxsspx
+; CHECK: lfsx
; CHECK-P9-LABEL: @aercalc_
; CHECK-P9: lfs
diff --git a/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll b/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
index f880d1f..6be31ea 100644
--- a/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
+++ b/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
@@ -6,7 +6,7 @@
; CHECK-LABEL: testSingleAccess:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: addi 3, 3, 8
-; CHECK-NEXT: lxsiwax 0, 0, 3
+; CHECK-NEXT: lfiwax 0, 0, 3
; CHECK-NEXT: xscvsxdsp 1, 0
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index 2954812..fd86480 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -780,7 +780,7 @@
; ALL-NEXT: .LBB38_2:
; ALL-NEXT: addis 3, 2, .LCPI38_1@toc@ha
; ALL-NEXT: addi 3, 3, .LCPI38_1@toc@l
-; ALL-NEXT: lxsspx 1, 0, 3
+; ALL-NEXT: lfsx 1, 0, 3
; ALL-NEXT: blr
%sel = select i1 %cond, double -4.0, double 23.3
%bo = frem double %sel, 5.1
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 98fe3a8..0a4db39 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -17,8 +17,8 @@
; CHECK-LABEL: testi0
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 1, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 1, 0
+; CHECK-DAG: xxspltd 1, 1, 0
+; CHECK-DAG: xxswapd 0, 0
; CHECK: xxpermdi 34, 0, 1, 1
; CHECK-P9-LABEL: testi0
@@ -37,8 +37,8 @@
; CHECK-LABEL: testi1
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 1, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 1, 0
+; CHECK-DAG: xxspltd 1, 1, 0
+; CHECK-DAG: xxswapd 0, 0
; CHECK: xxmrgld 34, 1, 0
; CHECK-P9-LABEL: testi1
diff --git a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 7da2ea2..d8ea0dc 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -20,7 +20,7 @@
ret void
; CHECK-LABEL: @dblToInt
; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV1]],
+; CHECK: stfiwx [[REGCONV1]],
}
; Function Attrs: nounwind
@@ -33,7 +33,7 @@
ret void
; CHECK-LABEL: @fltToInt
; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV2]],
+; CHECK: stfiwx [[REGCONV2]],
}
; Function Attrs: nounwind
@@ -45,7 +45,7 @@
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @intToDbl
-; CHECK: lxsiwax [[REGLD1:[0-9]+]],
+; CHECK: lfiwax [[REGLD1:[0-9]+]],
; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]]
}
@@ -58,7 +58,7 @@
store volatile float %conv, float* %ff, align 4
ret void
; CHECK-LABEL: @intToFlt
-; CHECK: lxsiwax [[REGLD2:[0-9]+]],
+; CHECK: lfiwax [[REGLD2:[0-9]+]],
; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
}
@@ -72,7 +72,7 @@
ret void
; CHECK-LABEL: @dblToUInt
; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV3]],
+; CHECK: stfiwx [[REGCONV3]],
}
; Function Attrs: nounwind
@@ -85,7 +85,7 @@
ret void
; CHECK-LABEL: @fltToUInt
; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV4]],
+; CHECK: stfiwx [[REGCONV4]],
}
; Function Attrs: nounwind
@@ -97,7 +97,7 @@
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @uIntToDbl
-; CHECK: lxsiwzx [[REGLD3:[0-9]+]],
+; CHECK: lfiwzx [[REGLD3:[0-9]+]],
; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]]
}
@@ -110,7 +110,7 @@
store volatile float %conv, float* %ff, align 4
ret void
; CHECK-LABEL: @uIntToFlt
-; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
+; CHECK: lfiwzx [[REGLD4:[0-9]+]],
; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
}
@@ -124,7 +124,7 @@
ret void
; CHECK-LABEL: @dblToFloat
; CHECK: lxsdx [[REGLD5:[0-9]+]],
-; CHECK: stxsspx [[REGLD5]],
+; CHECK: stfsx [[REGLD5]],
; CHECK-P9-LABEL: @dblToFloat
; CHECK-P9: lfd [[REGLD5:[0-9]+]],
; CHECK-P9: stfs [[REGLD5]],
@@ -139,7 +139,7 @@
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @floatToDbl
-; CHECK: lxsspx [[REGLD5:[0-9]+]],
+; CHECK: lfsx [[REGLD5:[0-9]+]],
; CHECK: stxsdx [[REGLD5]],
; CHECK-P9-LABEL: @floatToDbl
; CHECK-P9: lfs [[REGLD5:[0-9]+]],