When save/restoring CR at prolog/epilog, in a large
stack frame, the prolog/epilog code was using the same
register for the copy of CR and the address of the save slot. Oops.
This is fixed here for Darwin, sort of, by reserving R2 for this case.
A better way would be to do the store before the decrement of SP,
which is safe on Darwin due to the red zone.
SVR4 probably has the same problem, but I don't know how to fix it;
there is no red zone and R2 is already used for something else.
I'm going to leave it to someone interested in that target.
Better still would be to rewrite the CR-saving code completely;
spilling each CR subregister individually is horrible code.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96015 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index af7d812..3db623a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -421,22 +421,30 @@
FrameIdx));
return true;
} else {
- // FIXME: We use R0 here, because it isn't available for RA. We need to
- // store the CR in the low 4-bits of the saved value. First, issue a MFCR
- // to save all of the CRBits.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
- // rlwinm r0, r0, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+ // rlwinm scratch, scratch, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(ShiftBits)
+ .addImm(0).addImm(31));
}
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R0,
+ .addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
}
@@ -540,20 +548,28 @@
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (RC == PPC::CRRCRegisterClass) {
- // FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
- FrameIdx));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
}
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
} else if (RC == PPC::CRBITRCRegisterClass) {
unsigned Reg = 0;
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
new file mode 100644
index 0000000..b73382e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; ModuleID = 'hh.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+; This formerly used R0 for both the stack address and CR.
+
+define void @foo() nounwind {
+entry:
+;CHECK: mfcr r2
+;CHECK: rlwinm r2, r2, 8, 0, 31
+;CHECK: lis r0, 1
+;CHECK: ori r0, r0, 34540
+;CHECK: stwx r2, r1, r0
+ %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]
+ call void @bar(i8* %x1) nounwind
+ call void asm sideeffect "", "~{cr2}"() nounwind
+ br label %return
+
+return: ; preds = %entry
+;CHECK: lis r0, 1
+;CHECK: ori r0, r0, 34540
+;CHECK: lwzx r2, r1, r0
+;CHECK: rlwinm r2, r2, 24, 0, 31
+;CHECK: mtcrf 32, r2
+ ret void
+}
+
+declare void @bar(i8*)