[Power9] Exploit D-Form VSX Scalar memory ops that target full VSX register set This patch corresponds to review: The newly added VSX D-Form (register + offset) memory ops target the upper half of the VSX register set. The existing ones target the lower half. In order to unify these and have the ability to target all the VSX registers using D-Form operations, this patch defines Pseudo-ops for the loads/stores which are expanded post-RA. The expansion then choses the correct opcode based on the register that was allocated for the operation. llvm-svn: 283212

commit: 6354d235559477ce0c290f85919cd49c77909231 [log] [tgz]
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> Tue Oct 04 11:25:52 2016 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> Tue Oct 04 11:25:52 2016 +0000
tree: 500f82b489e3e5b551abf91338960a72b82aef6d
parent: ef445b7824152f0dba32823fd89b3a76878c8557 [diff]
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 750daec..2e0b935 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

@@ -999,13 +999,15 @@
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf64 : PPC::STXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf32 : PPC::STXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
@@ -1128,12 +1130,14 @@
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf64 : PPC::LXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf32 : PPC::LXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.isDarwin() &&
@@ -1882,6 +1886,41 @@
         .addReg(Reg);
     return true;
   }
+  case PPC::DFLOADf32:
+  case PPC::DFLOADf64:
+  case PPC::DFSTOREf32:
+  case PPC::DFSTOREf64: {
+    assert(Subtarget.hasP9Vector() &&
+           "Invalid D-Form Pseudo-ops on non-P9 target.");
+    unsigned UpperOpcode, LowerOpcode;
+    switch (MI.getOpcode()) {
+    case PPC::DFLOADf32:
+      UpperOpcode = PPC::LXSSP;
+      LowerOpcode = PPC::LFS;
+      break;
+    case PPC::DFLOADf64:
+      UpperOpcode = PPC::LXSD;
+      LowerOpcode = PPC::LFD;
+      break;
+    case PPC::DFSTOREf32:
+      UpperOpcode = PPC::STXSSP;
+      LowerOpcode = PPC::STFS;
+      break;
+    case PPC::DFSTOREf64:
+      UpperOpcode = PPC::STXSD;
+      LowerOpcode = PPC::STFD;
+      break;
+    }
+    unsigned TargetReg = MI.getOperand(0).getReg();
+    unsigned Opcode;
+    if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
+        (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
+      Opcode = LowerOpcode;
+    else
+      Opcode = UpperOpcode;
+    MI.setDesc(get(Opcode));
+    return true;
+  }
   }
   return false;
 }

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 9fd43f1..5206d57 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td

@@ -118,6 +118,7 @@
 
   // Load indexed instructions
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSDX : XX1Form<31, 588,
                         (outs vsfrc:$XT), (ins memrr:$src),
                         "lxsdx $XT, $src", IIC_LdStLFD,
@@ -142,6 +143,7 @@
 
   // Store indexed instructions
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSDX : XX1Form<31, 716,
                         (outs), (ins vsfrc:$XT, memrr:$dst),
                         "stxsdx $XT, $dst", IIC_LdStSTFD,
@@ -1150,6 +1152,7 @@
 
   // VSX scalar loads introduced in ISA 2.07
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
                          "lxsspx $XT, $src", IIC_LdStLFD,
                          [(set f32:$XT, (load xoaddr:$src))]>;
@@ -1163,6 +1166,7 @@
 
   // VSX scalar stores introduced in ISA 2.07
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
                           "stxsspx $XT, $dst", IIC_LdStSTFD,
                           [(store f32:$XT, xoaddr:$dst)]>;
@@ -2245,6 +2249,8 @@
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayLoad = 1 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
@@ -2285,6 +2291,8 @@
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
   } // mayLoad
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayStore = 1 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
@@ -2556,6 +2564,22 @@
             (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
   def : Pat<(f64 (PPCVexts f64:$A, 2)),
             (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+  let isPseudo = 1 in {
+    def DFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
+                            "#DFLOADf32",
+                            [(set f32:$XT, (load iaddr:$src))]>;
+    def DFLOADf64  : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
+                            "#DFLOADf64",
+                            [(set f64:$XT, (load iaddr:$src))]>;
+    def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+                            "#DFSTOREf32",
+                            [(store f32:$XT, iaddr:$dst)]>;
+    def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+                            "#DFSTOREf64",
+                            [(store f64:$XT, iaddr:$dst)]>;
+  }
+  def : Pat<(f64 (extloadf32 iaddr:$src)),
+            (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
 } // end HasP9Vector, AddedComplexity
 
 let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index facdb36..e492014 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

@@ -78,6 +78,18 @@
   ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
   ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
   ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+
+  // VSX
+  ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX;
+  ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX;
+  ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX;
+  ImmToIdxMap[PPC::LXV] = PPC::LXVX;
+  ImmToIdxMap[PPC::LXSD] = PPC::LXSDX;
+  ImmToIdxMap[PPC::LXSSP] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::STXV] = PPC::STXVX;
+  ImmToIdxMap[PPC::STXSD] = PPC::STXSDX;
+  ImmToIdxMap[PPC::STXSSP] = PPC::STXSSPX;
 }
 
 /// getPointerRegClass - Return the register class to use to hold pointers.

diff --git a/llvm/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll
new file mode 100644
index 0000000..94412d6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs | FileCheck %s
+
+@gd = external local_unnamed_addr global [500 x double], align 8
+@gf = external local_unnamed_addr global [500 x float], align 4
+
+; Function Attrs: nounwind
+define double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) local_unnamed_addr #0 {
+entry:
+  %0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8
+  %1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8
+  %2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8
+  %3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8
+  %4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8
+  store double %3, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 122), align 8
+  %add = fadd double %a, %b
+  %add1 = fadd double %add, %c
+  %add2 = fadd double %add1, %d
+  %add3 = fadd double %add2, %e
+  %add4 = fadd double %add3, %f
+  %add5 = fadd double %add4, %g
+  %add6 = fadd double %add5, %h
+  %add7 = fadd double %add6, %i
+  %add8 = fadd double %add7, %j
+  %add9 = fadd double %add8, %k
+  %add10 = fadd double %add9, %l
+  %add11 = fadd double %add10, %m
+  %add12 = fadd double %add11, %0
+  %add13 = fadd double %add12, %1
+  %add14 = fadd double %add13, %2
+  %add15 = fadd double %add14, %3
+  %add16 = fadd double %add15, %4
+  %call = tail call double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m)
+  %add17 = fadd double %add16, %call
+  ret double %add17
+; CHECK-LABEL: _Z7getLXSDddddddddddddd
+; CHECK: lxsd [[LD:[0-9]+]], 776(3)
+; CHECK: stxsd [[LD]], 976(3)
+}
+
+; Function Attrs: nounwind
+define float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m) local_unnamed_addr #0 {
+entry:
+  %0 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 10), align 4
+  %1 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 17), align 4
+  %2 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 87), align 4
+  %3 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 97), align 4
+  %4 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 77), align 4
+  store float %3, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 122), align 4
+  %add = fadd float %a, %b
+  %add1 = fadd float %add, %c
+  %add2 = fadd float %add1, %d
+  %add3 = fadd float %add2, %e
+  %add4 = fadd float %add3, %f
+  %add5 = fadd float %add4, %g
+  %add6 = fadd float %add5, %h
+  %add7 = fadd float %add6, %i
+  %add8 = fadd float %add7, %j
+  %add9 = fadd float %add8, %k
+  %add10 = fadd float %add9, %l
+  %add11 = fadd float %add10, %m
+  %add12 = fadd float %add11, %0
+  %add13 = fadd float %add12, %1
+  %add14 = fadd float %add13, %2
+  %add15 = fadd float %add14, %3
+  %add16 = fadd float %add15, %4
+  %call = tail call float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m)
+  %add17 = fadd float %add16, %call
+  ret float %add17
+; CHECK-LABEL: _Z8getLXSSPfffffffffffff
+; CHECK: lxssp [[LD:[0-9]+]], 388(3)
+; CHECK: stxssp [[LD]], 488(3)
+}

diff --git a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
index af6ffc7..79da5cb 100644
--- a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll

@@ -1,6 +1,7 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
 ; RUN:  --check-prefix=CHECK-P7
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
 
 define signext i32 @f32toi32(float %a) {
 entry:

diff --git a/llvm/test/CodeGen/PowerPC/float-to-int.ll b/llvm/test/CodeGen/PowerPC/float-to-int.ll
index 012a021..80e6b75 100644
--- a/llvm/test/CodeGen/PowerPC/float-to-int.ll
+++ b/llvm/test/CodeGen/PowerPC/float-to-int.ll

@@ -1,6 +1,11 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=g5
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck -check-prefix=CHECK-P9 %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -19,6 +24,12 @@
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo
+; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo2(double %a) nounwind {
@@ -36,6 +47,12 @@
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo2
+; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo3(float %a) nounwind {
@@ -53,6 +70,12 @@
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo3
+; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo4(double %a) nounwind {
@@ -70,6 +93,12 @@
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo4
+; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i32 @goo(float %a) nounwind {

diff --git a/llvm/test/CodeGen/PowerPC/i64-to-float.ll b/llvm/test/CodeGen/PowerPC/i64-to-float.ll
index d8d2822..e139579 100644
--- a/llvm/test/CodeGen/PowerPC/i64-to-float.ll
+++ b/llvm/test/CodeGen/PowerPC/i64-to-float.ll

@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -19,6 +23,12 @@
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: fcfids 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @foo
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvsxdsp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define double @goo(i64 %a) nounwind {
@@ -37,6 +47,12 @@
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: xscvsxddp 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @goo
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvsxddp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define float @foou(i64 %a) nounwind {
@@ -55,6 +71,12 @@
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: fcfidus 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @foou
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvuxdsp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define double @goou(i64 %a) nounwind {
@@ -73,5 +95,11 @@
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: xscvuxddp 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @goou
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvuxddp 1, [[REG]]
+; CHECK-P9: blr
 }
 

diff --git a/llvm/test/CodeGen/PowerPC/mcm-12.ll b/llvm/test/CodeGen/PowerPC/mcm-12.ll
index 26255ee..c07dbd9 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-12.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-12.ll

@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=-vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \
+; RUN:   -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \
+; RUN:   -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O1 -code-model=medium < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-P9 %s
 
 ; Test peephole optimization for medium code model (32-bit TOC offsets)
 ; for loading a value from the constant pool (TOC-relative).
@@ -24,3 +28,10 @@
 ; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
 ; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
 ; CHECK-VSX: lxsdx {{[0-9]+}}, 0, [[REG1]]
+
+; CHECK-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK-P9: .quad 4562098671269285104
+; CHECK-P9-LABEL: test_double_const:
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK-P9: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
+; CHECK-P9: lfd {{[0-9]+}}, 0([[REG1]])

diff --git a/llvm/test/CodeGen/PowerPC/mcm-4.ll b/llvm/test/CodeGen/PowerPC/mcm-4.ll
index 34fa4d1..180977d 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-4.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-4.ll

@@ -1,7 +1,15 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-P9 %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-P9 %s
 
 ; Test correct code generation for medium and large code model
 ; for loading a value from the constant pool (TOC-relative).
@@ -41,3 +49,17 @@
 ; LARGE-VSX: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
 ; LARGE-VSX: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
 ; LARGE-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]]
+
+; MEDIUM-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM-P9: .quad 4562098671269285104
+; MEDIUM-P9-LABEL: test_double_const:
+; MEDIUM-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM-P9: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM-P9: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE-P9: .quad 4562098671269285104
+; LARGE-P9-LABEL: test_double_const:
+; LARGE-P9: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE-P9: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
+; LARGE-P9: lfd {{[0-9]+}}, 0([[REG2]])

diff --git a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index c6c7e2f..c3cccd5 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll

@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s
 
 ; Verify internal alignment of long double in a struct.  The double
 ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain

diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 65e94a1..25b3e5d 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll

@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 \
+; RUN:   -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec \
+; RUN:   -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr9 \
+; RUN:   -mattr=-direct-move -mattr=+altivec | FileCheck %s
 
 ; Currently VSX support is disabled for this test because we generate lxsdx
 ; instead of lfd, and stxsdx instead of stfd.  That is a poor choice when we

diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
index c5bd49b..7f959ad 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll

@@ -1,4 +1,6 @@
 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN:   %s --check-prefix=CHECK-P9
 
 ; Verify peephole simplification of splats and swaps.  Bugpoint-reduced
 ; test from Eric Schweitz.
@@ -59,3 +61,9 @@
 ; CHECK: xxspltd
 ; CHECK: stxvd2x
 ; CHECK-NOT: xxswapd
+
+; CHECK-P9-LABEL: @aercalc_
+; CHECK-P9: lfs
+; CHECK-P9: xxspltd
+; CHECK-P9: stxvx
+; CHECK-P9-NOT: xxswapd

diff --git a/llvm/test/CodeGen/PowerPC/pr25157.ll b/llvm/test/CodeGen/PowerPC/pr25157.ll
index 7137d67..ee9a003 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157.ll

@@ -1,4 +1,6 @@
 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN:   --check-prefix=CHECK-P9 %s
 
 ; Verify correct generation of an lxsspx rather than an invalid optimization
 ; to lxvdsx.  Bugpoint-reduced test from Eric Schweitz.
@@ -56,3 +58,5 @@
 
 ; CHECK-LABEL: @aercalc_
 ; CHECK: lxsspx
+; CHECK-P9-LABEL: @aercalc_
+; CHECK-P9: lfs

diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
index da6605e..d573441 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll

@@ -34,9 +34,9 @@
 
 ; CHECK-P9-LABEL: @bar0
 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
 ; CHECK-P9: stxvx [[REG5]]
 
 define void @bar1() {
@@ -57,8 +57,8 @@
 
 ; CHECK-P9-LABEL: @bar1
 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
 ; CHECK-P9: stxvx [[REG5]]
 

diff --git a/llvm/test/CodeGen/PowerPC/vsx-spill.ll b/llvm/test/CodeGen/PowerPC/vsx-spill.ll
index 388029a..4dec0da 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-spill.ll

@@ -1,7 +1,14 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-REG %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-P9-REG %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -fast-isel -O0 < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-P9-FISL %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -21,6 +28,15 @@
 ; CHECK-FISL: stxsdx 1, 1, 0
 ; CHECK-FISL: blr
 
+; CHECK-P9-REG: @foo1
+; CHECK-P9-REG: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK-P9-REG: xxlor 1, [[R1]], [[R1]]
+; CHECK-P9-REG: blr
+
+; CHECK-P9-FISL: @foo1
+; CHECK-P9-FISL: stfd 31, -8(1)
+; CHECK-P9-FISL: blr
+
 return:                                           ; preds = %entry
   ret double %a
 }
@@ -42,6 +58,17 @@
 ; CHECK-FISL: lxsdx [[R1]], [[R1]], 0
 ; CHECK-FISL: blr
 
+; CHECK-P9-REG: @foo2
+; CHECK-P9-REG: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK-P9-REG: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK-P9-REG: blr
+
+; CHECK-P9-FISL: @foo2
+; CHECK-P9-FISL: xsadddp [[R1:[0-9]+]], 1, 1
+; CHECK-P9-FISL: stfd [[R1]], [[OFF:[0-9\-]+]](1)
+; CHECK-P9-FISL: lfd [[R1]], [[OFF]](1)
+; CHECK-P9-FISL: blr
+
 return:                                           ; preds = %entry
   ret double %b
 }
@@ -57,6 +84,15 @@
 ; CHECK: xsadddp 1, [[R1]], [[R1]]
 ; CHECK: blr
 
+; CHECK-P9-REG-LABEL: foo3
+; CHECK-P9-REG: stfd 1, [[OFF:[0-9\-]+]](1)
+; CHECK-P9-REG: lfd [[FPR:[0-9]+]], [[OFF]](1)
+; CHECK-P9-REG: xsadddp 1, [[FPR]], [[FPR]]
+
+; CHECK-P9-FISL-LABEL: foo3
+; CHECK-P9-FISL: stfd 1, [[OFF:[0-9\-]+]](1)
+; CHECK-P9-FISL: lfd [[FPR:[0-9]+]], [[OFF]](1)
+; CHECK-P9-FISL: xsadddp 1, [[FPR]], [[FPR]]
 return:                                           ; preds = %entry
   %b = fadd double %a, %a
   ret double %b

diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 5387888..09bf683 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

@@ -22,10 +22,10 @@
 ; CHECK: xxpermdi 34, 0, 1, 1
 
 ; CHECK-P9-LABEL: testi0
-; CHECK-P9: lxsdx 0, 0, 4
-; CHECK-P9: lxvx 1, 0, 3
-; CHECK-P9: xxspltd 0, 0, 0
-; CHECK-P9: xxpermdi 34, 1, 0, 1
+; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
+; CHECK-P9: lxvx [[REG2:[0-9]+]], 0, 3
+; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
+; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
 }
 
 define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
@@ -42,10 +42,10 @@
 ; CHECK: xxmrgld 34, 1, 0
 
 ; CHECK-P9-LABEL: testi1
-; CHECK-P9: lxsdx 0, 0, 4
-; CHECK-P9: lxvx 1, 0, 3
-; CHECK-P9: xxspltd 0, 0, 0
-; CHECK-P9: xxmrgld 34, 0, 1
+; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
+; CHECK-P9: lxvx [[REG2:[0-9]+]], 0, 3
+; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
+; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
 }
 
 define double @teste0(<2 x double>* %p1) {
@@ -57,7 +57,7 @@
 ; CHECK: lxvd2x 1, 0, 3
 
 ; CHECK-P9-LABEL: teste0
-; CHECK-P9: lxsdx 1, 0, 3
+; CHECK-P9: lfd 1, 0(3)
 }
 
 define double @teste1(<2 x double>* %p1) {
@@ -70,6 +70,5 @@
 ; CHECK: xxswapd 1, 0
 
 ; CHECK-P9-LABEL: teste1
-; CHECK-P9: li 4, 8
-; CHECK-P9: lxsdx 1, 3, 4
+; CHECK-P9: lfd 1, 8(3)
 }

diff --git a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 65eb608..7da2ea2 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll

@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9
 
 @d = common global double 0.000000e+00, align 8
 @f = common global float 0.000000e+00, align 4
@@ -121,6 +125,9 @@
 ; CHECK-LABEL: @dblToFloat
 ; CHECK: lxsdx [[REGLD5:[0-9]+]],
 ; CHECK: stxsspx [[REGLD5]],
+; CHECK-P9-LABEL: @dblToFloat
+; CHECK-P9: lfd [[REGLD5:[0-9]+]],
+; CHECK-P9: stfs [[REGLD5]],
 }
 
 ; Function Attrs: nounwind
@@ -134,4 +141,7 @@
 ; CHECK-LABEL: @floatToDbl
 ; CHECK: lxsspx [[REGLD5:[0-9]+]],
 ; CHECK: stxsdx [[REGLD5]],
+; CHECK-P9-LABEL: @floatToDbl
+; CHECK-P9: lfs [[REGLD5:[0-9]+]],
+; CHECK-P9: stfd [[REGLD5]],
 }
commit	6354d235559477ce0c290f85919cd49c77909231	[log] [tgz]
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	Tue Oct 04 11:25:52 2016 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	Tue Oct 04 11:25:52 2016 +0000
tree	500f82b489e3e5b551abf91338960a72b82aef6d
parent	ef445b7824152f0dba32823fd89b3a76878c8557 [diff]