Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg.
For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93278 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 99f8c34..2203f8c 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -170,6 +170,10 @@
/// instructions.
FunctionPass *createMachineSinkingPass();
+ /// createOptimizeExtsPass - This pass performs sign / zero extension
+ /// optimization by increasing uses of extended values.
+ FunctionPass *createOptimizeExtsPass();
+
/// createStackSlotColoringPass - This pass performs stack slot coloring.
FunctionPass *createStackSlotColoringPass(bool);
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 6172fcf..e6df1bf 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -149,16 +149,15 @@
return false;
}
- /// isCoalescableInstr - Return true if the instruction is "coalescable". That
- /// is, it's like a copy where it's legal for the source to overlap the
- /// destination. e.g. X86::MOVSX64rr32.
- virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- isCopy = true;
- return true;
- }
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
return false;
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 8757c9f..84eb71c 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -62,6 +62,10 @@
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+#if 1
+static cl::opt<bool> XX("xx", cl::Hidden);
+#endif
+
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -324,6 +328,7 @@
/* allowDoubleDefs= */ true);
if (OptLevel != CodeGenOpt::None) {
+ PM.add(createOptimizeExtsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
if (!DisableMachineSink)
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
new file mode 100644
index 0000000..02fc82e
--- /dev/null
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -0,0 +1,149 @@
+//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ext-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+
+namespace {
+ class OptimizeExts : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ OptimizeExts() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ };
+}
+
+char OptimizeExts::ID = 0;
+static RegisterPass<OptimizeExts>
+X("opt-exts", "Optimize sign / zero extensions");
+
+FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
+
+bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
+ ++MII) {
+ MachineInstr *MI = &*MII;
+ LocalMIs.insert(MI);
+
+ unsigned SrcReg, DstReg, SubIdx;
+ if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ continue;
+
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+ if (++UI == MRI->use_end())
+ // No other uses.
+ continue;
+
+ // Ok, the source has other uses. See if we can replace the other uses
+ // with use of the result of the extension.
+
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ UI = MRI->use_begin(DstReg);
+ for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+ ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ bool ExtendLife = true;
+ SmallVector<MachineOperand*, 8> Uses;
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ UI = MRI->use_begin(SrcReg);
+ for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+ ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB))
+ // Non-local uses where the result of extension is used. Always
+ // replace these.
+ Uses.push_back(&UseMO);
+ else if (Aggressive && DT->dominates(MBB, UseMBB))
+ // We may want to extend live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ else {
+ // Both will be live out of the def MBB anyway. Don't extend live
+ // range of the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Ok, we'll extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ if (!Uses.empty()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
+ .addReg(DstReg).addImm(SubIdx);
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5ef3354..a1bacbf 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -713,9 +713,9 @@
}
bool
-X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: break;
case X86::MOVSX16rr8:
@@ -733,10 +733,8 @@
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
- isCopy = false;
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
- DstSubIdx = 0;
switch (MI.getOpcode()) {
default:
llvm_unreachable(0);
@@ -747,22 +745,23 @@
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
- SrcSubIdx = 1;
+ SubIdx = 1;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
- SrcSubIdx = 3;
+ SubIdx = 3;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
- SrcSubIdx = 4;
+ SubIdx = 4;
break;
}
+ return true;
}
}
- return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ return false;
}
/// isFrameOperand - Return true and the FrameIndex if the specified
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 6ae7808..0ab85f4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -448,13 +448,15 @@
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
- /// isCoalescableInstr - Return true if the instruction is "coalescable". That
- /// is, it's like a copy where it's legal for the source to overlap the
- /// destination. e.g. X86::MOVSX64rr32.
- virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 67e14ff..9361a6f 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
; PR2568
@g_3 = external global i16 ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
new file mode 100644
index 0000000..b2b9f81
--- /dev/null
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+ %C = add i64 %A, %B
+ %D = trunc i64 %C to i32
+ volatile store i32 %D, i32* %P
+ %E = shl i64 %C, 32
+ %F = ashr i64 %E, 32
+ volatile store i64 %F, i64 *%P2
+ volatile store i32 %D, i32* %P
+ ret i64 undef
+}
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index d762392..be9f3af 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6
+; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 9
type { [62 x %struct.Bitvec*] } ; type %0
type { i8* } ; type %1