[PowerPC] Add a peephole post RA to transform the inst that fed by add

If the arch is P8, we will select XFLOAD to load the floating point, and then, expand it to vsx and non-vsx X-form instruction post RA. This patch is trying to convert the X-form to D-form if it meets the requirement that one operand of the x-form inst is the special Zero register, and another operand fed by add inst. i.e.
y = add imm, reg
LFDX. 0, y
-->
LFD imm(reg)

Reviewers: Nemanjai
Differential Revision: https://reviews.llvm.org/D49007

llvm-svn: 340149
diff --git a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
index e5fd9f5..54e378e 100644
--- a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -18,7 +18,7 @@
 entry:
   %0 = bitcast double %a to i64
   ret i64 %0
-; CHECK-P7: stfdx 1,
+; CHECK-P7: stfd 1,
 ; CHECK-P7: ld 3,
 ; CHECK: mffprd 3, 1
 }
@@ -39,7 +39,7 @@
   %0 = bitcast i64 %a to double
   ret double %0
 ; CHECK-P7: std 3,
-; CHECK-P7: lfdx 1,
+; CHECK-P7: lfd 1,
 ; CHECK: mtvsrd 1, 3
 }
 
@@ -58,7 +58,7 @@
 entry:
   %0 = bitcast double %a to i64
   ret i64 %0
-; CHECK-P7: stfdx 1,
+; CHECK-P7: stfd 1,
 ; CHECK-P7: ld 3,
 ; CHECK: mffprd 3, 1
 }
@@ -79,6 +79,6 @@
   %0 = bitcast i64 %a to double
   ret double %0
 ; CHECK-P7: std 3,
-; CHECK-P7: lfdx 1,
+; CHECK-P7: lfd 1,
 ; CHECK: mtvsrd 1, 3
 }
diff --git a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
index c4666d3..fcec348 100644
--- a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
+++ b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
@@ -13,10 +13,8 @@
 ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha
 ; CHECK-DAG: xxlxor 2, 2, 2
 ; CHECK-NOT: beq
-; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]]
-; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]]
-; CHECK-DAG: lfdx 1, 0, [[LD1BASE]]
-; CHECK-DAG: lfdx 3, 0, [[LD2BASE]]
+; CHECK-DAG: lfd 1, .LCPI0_0@toc@l([[LD1REG]])
+; CHECK-DAG: lfd 3, .LCPI0_1@toc@l([[LD2REG]])
 ; CHECK: .LBB[[LAB1]]
 ; CHECK: xsadddp 0, 1, 2
 ; CHECK: xsadddp 1, 0, 3
@@ -32,16 +30,14 @@
 ; CHECK-NOCOALESCE-NEXT:    beq 0, .LBB0_4
 ; CHECK-NOCOALESCE-NEXT:  .LBB0_3: # %entry
 ; CHECK-NOCOALESCE-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
-; CHECK-NOCOALESCE-NEXT:    addi 3, 3, .LCPI0_1@toc@l
-; CHECK-NOCOALESCE-NEXT:    lfdx 3, 0, 3
+; CHECK-NOCOALESCE-NEXT:    lfd 3, .LCPI0_1@toc@l(3)
 ; CHECK-NOCOALESCE-NEXT:  .LBB0_4: # %entry
 ; CHECK-NOCOALESCE-NEXT:    xsadddp 0, 1, 2
 ; CHECK-NOCOALESCE-NEXT:    xsadddp 1, 0, 3
 ; CHECK-NOCOALESCE-NEXT:    blr
 ; CHECK-NOCOALESCE-NEXT:  .LBB0_5: # %entry
 ; CHECK-NOCOALESCE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; CHECK-NOCOALESCE-NEXT:    addi 3, 3, .LCPI0_0@toc@l
-; CHECK-NOCOALESCE-NEXT:    lfdx 1, 0, 3
+; CHECK-NOCOALESCE-NEXT:    lfd 1, .LCPI0_0@toc@l(3)
 ; CHECK-NOCOALESCE-NEXT:    beq 0, .LBB0_2
 ; CHECK-NOCOALESCE-NEXT:  .LBB0_6: # %entry
 ; CHECK-NOCOALESCE-NEXT:    xxlxor 2, 2, 2
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll b/llvm/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
index 8a95de3..e3d5282 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64VSX
+; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-late-peephole=false | FileCheck %s --check-prefix=ELF64VSX
 
 ;; The semantics of VSX stores for when R0 is used is different depending on
 ;; whether it is used as base or offset. If used as base, the effective
diff --git a/llvm/test/CodeGen/PowerPC/float-to-int.ll b/llvm/test/CodeGen/PowerPC/float-to-int.ll
index 6249c94..54f9cec 100644
--- a/llvm/test/CodeGen/PowerPC/float-to-int.ll
+++ b/llvm/test/CodeGen/PowerPC/float-to-int.ll
@@ -21,7 +21,7 @@
 
 ; CHECK-VSX: @foo
 ; CHECK-VSX: xscvdpsxds [[REG:[0-9]+]], 1
-; CHECK-VSX: stfdx [[REG]],
+; CHECK-VSX: stfd [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
 
@@ -44,7 +44,7 @@
 
 ; CHECK-VSX: @foo2
 ; CHECK-VSX: xscvdpsxds [[REG:[0-9]+]], 1
-; CHECK-VSX: stfdx [[REG]],
+; CHECK-VSX: stfd [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
 
@@ -67,7 +67,7 @@
 
 ; CHECK-VSX: @foo3
 ; CHECK-VSX: xscvdpuxds [[REG:[0-9]+]], 1
-; CHECK-VSX: stfdx [[REG]],
+; CHECK-VSX: stfd [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
 
@@ -90,7 +90,7 @@
 
 ; CHECK-VSX: @foo4
 ; CHECK-VSX: xscvdpuxds [[REG:[0-9]+]], 1
-; CHECK-VSX: stfdx [[REG]],
+; CHECK-VSX: stfd [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
 
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index bbca9c6..ea40e4e 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -165,16 +165,14 @@
 ; FMF-LABEL: fmul_fma_reassoc1:
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI6_0@toc@l
-; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc1:
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; GLOBAL-NEXT:    addi 3, 3, .LCPI6_0@toc@l
-; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul float %x, 42.0
@@ -196,16 +194,14 @@
 ; FMF-LABEL: fmul_fma_reassoc2:
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI7_0@toc@l
-; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc2:
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; GLOBAL-NEXT:    addi 3, 3, .LCPI7_0@toc@l
-; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul reassoc float %x, 42.0
@@ -227,16 +223,14 @@
 ; FMF-LABEL: fmul_fma_fast1:
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI8_0@toc@l
-; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast1:
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; GLOBAL-NEXT:    addi 3, 3, .LCPI8_0@toc@l
-; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul float %x, 42.0
@@ -258,16 +252,14 @@
 ; FMF-LABEL: fmul_fma_fast2:
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI9_0@toc@l
-; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast2:
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; GLOBAL-NEXT:    addi 3, 3, .LCPI9_0@toc@l
-; GLOBAL-NEXT:    lfsx 0, 0, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul fast float %x, 42.0
@@ -294,8 +286,7 @@
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
 ; FMF-NEXT:    xsrsqrtesp 3, 1
-; FMF-NEXT:    addi 3, 3, .LCPI10_0@toc@l
-; FMF-NEXT:    lfsx 0, 0, 3
+; FMF-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 2, 1, 0
 ; FMF-NEXT:    xsmulsp 4, 3, 3
 ; FMF-NEXT:    xssubsp 2, 2, 1
@@ -317,8 +308,7 @@
 ; GLOBAL-NEXT:    fneg 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
 ; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    addi 3, 3, .LCPI10_0@toc@l
-; GLOBAL-NEXT:    lfsx 3, 0, 3
+; GLOBAL-NEXT:    lfs 3, .LCPI10_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 2, 2
 ; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
@@ -352,8 +342,7 @@
 ; FMF-NEXT:    fneg 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
 ; FMF-NEXT:    fmr 4, 1
-; FMF-NEXT:    addi 3, 3, .LCPI11_0@toc@l
-; FMF-NEXT:    lfsx 3, 0, 3
+; FMF-NEXT:    lfs 3, .LCPI11_0@toc@l(3)
 ; FMF-NEXT:    xsmaddasp 4, 0, 3
 ; FMF-NEXT:    xsmulsp 0, 2, 2
 ; FMF-NEXT:    xsmaddasp 3, 4, 0
@@ -373,8 +362,7 @@
 ; GLOBAL-NEXT:    fneg 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
 ; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    addi 3, 3, .LCPI11_0@toc@l
-; GLOBAL-NEXT:    lfsx 3, 0, 3
+; GLOBAL-NEXT:    lfs 3, .LCPI11_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 2, 2
 ; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 06efa89..54ceccd 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -7,10 +7,8 @@
 define i128 @test_abs(ppc_fp128 %x) nounwind  {
 entry:
 ; PPC64-LABEL: test_abs:
-; PPC64-DAG: stfdx 2, 0, [[ADDR_HI:[0-9]+]]
-; PPC64-DAG: stfdx 1, 0, [[ADDR_LO:[0-9]+]]
-; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
-; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: stfd 2, [[OFFSET_HI:-?[0-9]+]]([[SP:[0-9]+]])
+; PPC64-DAG: stfd 1, [[OFFSET_LO:-?[0-9]+]]([[SP]]) 
 ; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
 ; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
 ; PPC64-DAG: rldicr [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0
@@ -44,10 +42,8 @@
 define i128 @test_neg(ppc_fp128 %x) nounwind  {
 entry:
 ; PPC64-LABEL: test_neg:
-; PPC64-DAG: stfdx 2, 0, [[ADDR_HI:[0-9]+]]
-; PPC64-DAG: stfdx 1, 0, [[ADDR_LO:[0-9]+]]
-; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
-; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: stfd 2, [[OFFSET_HI:-?[0-9]+]]([[SP:[0-9]+]])
+; PPC64-DAG: stfd 1, [[OFFSET_LO:-?[0-9]+]]([[SP]]) 
 ; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1
 ; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63
 ; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
@@ -85,8 +81,7 @@
 define i128 @test_copysign(ppc_fp128 %x) nounwind  {
 entry:
 ; PPC64-LABEL: test_copysign:
-; PPC64-DAG: stfdx 1, 0, [[ADDR_REG:[0-9]+]]
-; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]]
+; PPC64-DAG: stfd 1, [[OFFSET:-?[0-9]+]](1)
 ; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399
 ; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019
 ; PPC64-NOT: BARRIER
diff --git a/llvm/test/CodeGen/PowerPC/i64-to-float.ll b/llvm/test/CodeGen/PowerPC/i64-to-float.ll
index b15f1f8..cf59657 100644
--- a/llvm/test/CodeGen/PowerPC/i64-to-float.ll
+++ b/llvm/test/CodeGen/PowerPC/i64-to-float.ll
@@ -20,7 +20,7 @@
 
 ; CHECK-VSX: @foo
 ; CHECK-VSX: std 3,
-; CHECK-VSX: lfdx [[REG:[0-9]+]],
+; CHECK-VSX: lfd [[REG:[0-9]+]],
 ; CHECK-VSX: fcfids 1, [[REG]]
 ; CHECK-VSX: blr
 
@@ -44,7 +44,7 @@
 
 ; CHECK-VSX: @goo
 ; CHECK-VSX: std 3,
-; CHECK-VSX: lfdx [[REG:[0-9]+]],
+; CHECK-VSX: lfd [[REG:[0-9]+]],
 ; CHECK-VSX: xscvsxddp 1, [[REG]]
 ; CHECK-VSX: blr
 
@@ -68,7 +68,7 @@
 
 ; CHECK-VSX: @foou
 ; CHECK-VSX: std 3,
-; CHECK-VSX: lfdx [[REG:[0-9]+]],
+; CHECK-VSX: lfd [[REG:[0-9]+]],
 ; CHECK-VSX: fcfidus 1, [[REG]]
 ; CHECK-VSX: blr
 
@@ -92,7 +92,7 @@
 
 ; CHECK-VSX: @goou
 ; CHECK-VSX: std 3,
-; CHECK-VSX: lfdx [[REG:[0-9]+]],
+; CHECK-VSX: lfd [[REG:[0-9]+]],
 ; CHECK-VSX: xscvuxddp 1, [[REG]]
 ; CHECK-VSX: blr
 
diff --git a/llvm/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll b/llvm/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll
index e8b65a3..9386800 100644
--- a/llvm/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll
+++ b/llvm/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -o - %s | FileCheck %s
+; RUN: llc -O3 -ppc-late-peephole=false -o - %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
 
diff --git a/llvm/test/CodeGen/PowerPC/mcm-12.ll b/llvm/test/CodeGen/PowerPC/mcm-12.ll
index 8ba8a38..f64b62e 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-12.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-12.ll
@@ -26,8 +26,7 @@
 ; CHECK-VSX: .quad 4562098671269285104
 ; CHECK-VSX-LABEL: test_double_const:
 ; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
-; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
-; CHECK-VSX: lfdx {{[0-9]+}}, 0, [[REG1]]
+; CHECK-VSX: lfd {{[0-9]+}}, [[VAR]]@toc@l({{[0-9]+}}) 
 
 ; CHECK-P9: [[VAR:[a-z0-9A-Z_.]+]]:
 ; CHECK-P9: .quad 4562098671269285104
diff --git a/llvm/test/CodeGen/PowerPC/mcm-4.ll b/llvm/test/CodeGen/PowerPC/mcm-4.ll
index c335fff..9173e1f 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-4.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-4.ll
@@ -33,8 +33,7 @@
 ; MEDIUM-VSX: .quad 4562098671269285104
 ; MEDIUM-VSX-LABEL: test_double_const:
 ; MEDIUM-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
-; MEDIUM-VSX: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
-; MEDIUM-VSX: lfdx {{[0-9]+}}, 0, [[REG2]]
+; MEDIUM-VSX: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 
 ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
 ; LARGE: .quad 4562098671269285104
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index ec11b54..8a5c573 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -44,10 +44,8 @@
 ; CHECK-VSX-DAG: std 3, 48(1)
 ; CHECK-VSX-DAG: std 5, -16(1)
 ; CHECK-VSX-DAG: std 6, -8(1)
-; CHECK-VSX-DAG: addi [[REG1:[0-9]+]], 1, -16
-; CHECK-VSX-DAG: addi 3, 1, -8
-; CHECK-VSX: lfdx 1, 0, [[REG1]]
-; CHECK-VSX: lfdx 2, 0, 3
+; CHECK-VSX: lfd 1, -16(1)
+; CHECK-VSX: lfd 2, -8(1)
 
 ; FIXME-VSX: addi 4, 1, 48
 ; FIXME-VSX: lxsdx 1, 4, 3
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
index 0143067..8e853cd 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -42,8 +42,7 @@
   ret float %x
 }
 ; CHECK: @callee2
-; CHECK: addi [[TOCREG:[0-9]+]], 1, 136
-; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
+; CHECK: lfs {{[0-9]+}}, 136(1) 
 ; CHECK: blr
 
 define void @caller2() {
@@ -53,8 +52,7 @@
   ret void
 }
 ; CHECK: @caller2
-; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136
-; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]]
+; CHECK: stfs {{[0-9]+}}, 136({{[0-9]+}})
 ; CHECK: bl test2
 
 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
index 02301ea..cb958d4 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -57,7 +57,7 @@
 }
 
 ; CHECK-LABEL: @aercalc_
-; CHECK: lfsx
+; CHECK: lfs
 ; CHECK: xxspltd
 ; CHECK: stxvd2x
 ; CHECK-NOT: xxswapd
diff --git a/llvm/test/CodeGen/PowerPC/pr25157.ll b/llvm/test/CodeGen/PowerPC/pr25157.ll
index 27f50b0..7d89d29 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157.ll
@@ -57,6 +57,6 @@
 }
 
 ; CHECK-LABEL: @aercalc_
-; CHECK: lfsx
+; CHECK: lfs
 ; CHECK-P9-LABEL: @aercalc_
 ; CHECK-P9: lfs
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll
index 1b5ddff..d8f44f1 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll
@@ -163,16 +163,14 @@
 
 ; P8LE-LABEL: s2v_test_f2:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addi r3, r3, 8
-; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    lfd f0, 8(r3)
 ; P8LE-NEXT:    xxspltd vs0, vs0, 0
 ; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
 ; P8LE-NEXT:    blr
 
 ; P8BE-LABEL: s2v_test_f2:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addi r3, r3, 8
-; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    lfd f0, 8(r3)
 ; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
 ; P8BE-NEXT:    blr
 entry:
@@ -238,16 +236,14 @@
 
 ; P8LE-LABEL: s2v_test_f4:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addi r3, r3, 8
-; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    lfd f0, 8(r3)
 ; P8LE-NEXT:    xxspltd vs0, vs0, 0
 ; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
 ; P8LE-NEXT:    blr
 
 ; P8BE-LABEL: s2v_test_f4:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addi r3, r3, 8
-; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    lfd f0, 8(r3)
 ; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
 ; P8BE-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index 8e0c482..461e0eb 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -992,13 +992,11 @@
 ; ALL-NEXT:    bc 12, 1, .LBB48_2
 ; ALL-NEXT:  # %bb.1:
 ; ALL-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
-; ALL-NEXT:    addi 3, 3, .LCPI48_0@toc@l
-; ALL-NEXT:    lfdx 1, 0, 3
+; ALL-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
 ; ALL-NEXT:    blr
 ; ALL-NEXT:  .LBB48_2:
 ; ALL-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
-; ALL-NEXT:    addi 3, 3, .LCPI48_1@toc@l
-; ALL-NEXT:    lfsx 1, 0, 3
+; ALL-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
 ; ALL-NEXT:    blr
   %sel = select i1 %cond, double -4.0, double 23.3
   %bo = frem double %sel, 5.1
diff --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll
index 814e069..4f5c341 100644
--- a/llvm/test/CodeGen/PowerPC/toc-float.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-float.ll
@@ -1,24 +1,29 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck -check-prefix=CHECK-P9 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 <%s | FileCheck -check-prefix=CHECK-P8 %s
 
 ; As the constant could be represented as float, a float is
 ; loaded from constant pool.
 define double @doubleConstant1() {
   ret double 1.400000e+01
-}
 
 ; CHECK-LABEL: doubleConstant1:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+}
 
 ; As the constant couldn't be represented as float, a double is
 ; loaded from constant pool.
 define double @doubleConstant2() {
   ret double 2.408904e+01
-}
 
 ; CHECK-LABEL: doubleConstant2:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+}
 
 @FArr = hidden local_unnamed_addr global [10 x float] zeroinitializer, align 4
 
@@ -26,19 +31,24 @@
   %1 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* @FArr, i64 0, i64 3), align 4
   %2 = fadd float %1, 0x400B333340000000
   ret float %2
-}
 
 ; CHECK-LABEL: floatConstantArray 
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
-; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
+; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; CHECK-P8: lfs {{[0-9]+}}, 12([[REG2]])
+}
 
 define float @floatConstant() {
   ret float 0x400470A3E0000000
-}
 
 ; CHECK-LABEL: floatConstant:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+}
 
 ; llvm put the hidden globals into the TOC table.
 ; TODO - do some analysis and decide which globals could be put into TOC.
@@ -48,11 +58,14 @@
   %1 = load double, double* getelementptr inbounds ([200 x double], [200 x double]* @d, i64 0, i64 3), align 8
   %2 = fadd double %1, 6.880000e+00
   ret double %2
-}
 
 ; CHECK-LABEL: doubleConstantArray
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
-; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
+; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; CHECK-P8: lfd {{[0-9]+}}, 24([[REG2]])
+}
 
 @arr = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8
 
@@ -60,12 +73,34 @@
   %1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr, i64 0, i64 4096), align 8
   %2 = fadd double %1, 6.880000e+00
   ret double %2
+
+; Access an element with an offset that doesn't fit in the displacement field of LFD. 
+; CHECK-LABEL: doubleLargeConstantArray
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: li [[REG2:[0-9]+]], 0 
+; CHECK-P9: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
+; CHECK-P9: ori [[REG4:[0-9]+]], [[REG2]], 32768 
+; CHECK-P9: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] 
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: li [[REG2:[0-9]+]], 0 
+; CHECK-P8: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
+; CHECK-P8: ori [[REG4:[0-9]+]], [[REG2]], 32768 
+; CHECK-P8: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] 
 }
 
-; access element that out of range
-; CHECK-LABEL: doubleLargeConstantArray
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: li [[REG2:[0-9]+]], 0 
-; CHECK: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
-; CHECK: ori [[REG4:[0-9]+]], [[REG2]], 32768 
-; CHECK: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] 
+@vec_arr = global [10 x <4 x i32>] zeroinitializer, align 16
+
+define <4 x i32> @vectorArray() #0 {
+entry:
+  %0 = load <4 x i32>, <4 x i32>* getelementptr inbounds ([10 x <4 x i32>], [10 x <4 x i32>]* @vec_arr, i64 0, i64 2), align 16
+  ret <4 x i32> %0
+
+; CHECK-LABEL: vectorArray
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; CHECK-P9: lxv {{[0-9]+}}, 32([[REG2]])
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P8: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; CHECK-P8: addi [[REG3:[0-9]+]], [[REG2]], 32
+; CHECK-P8: lvx {{[0-9]+}}, 0, [[REG3]]
+}
diff --git a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index ec6646d..4d45e6e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -124,7 +124,7 @@
   ret void
 ; CHECK-LABEL: @dblToFloat
 ; CHECK: lfdx [[REGLD5:[0-9]+]],
-; CHECK: stfsx [[REGLD5]],
+; CHECK: stfs [[REGLD5]],
 ; CHECK-P9-LABEL: @dblToFloat
 ; CHECK-P9: lfd [[REGLD5:[0-9]+]],
 ; CHECK-P9: stfs [[REGLD5]],
@@ -140,7 +140,7 @@
   ret void
 ; CHECK-LABEL: @floatToDbl
 ; CHECK: lfsx [[REGLD5:[0-9]+]],
-; CHECK: stfdx [[REGLD5]],
+; CHECK: stfd [[REGLD5]],
 ; CHECK-P9-LABEL: @floatToDbl
 ; CHECK-P9: lfs [[REGLD5:[0-9]+]],
 ; CHECK-P9: stfd [[REGLD5]],