Add a quick and dirty "loop aligner pass". x86 uses it to align its loops to 16-byte boundaries.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47703 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter.cpp
index 7a01a87..9cdae34 100644
--- a/lib/CodeGen/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter.cpp
@@ -39,7 +39,8 @@
 char AsmPrinter::ID = 0;
 AsmPrinter::AsmPrinter(std::ostream &o, TargetMachine &tm,
                        const TargetAsmInfo *T)
-  : MachineFunctionPass((intptr_t)&ID), FunctionNumber(0), O(o), TM(tm), TAI(T)
+  : MachineFunctionPass((intptr_t)&ID), FunctionNumber(0), O(o), TM(tm), TAI(T),
+    IsInTextSection(false)
 {}
 
 std::string AsmPrinter::getSectionForFunction(const Function &F) const {
@@ -69,6 +70,8 @@
 
   if (!CurrentSection.empty())
     O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
+
+  IsInTextSection = true;
 }
 
 /// SwitchToDataSection - Switch to the specified data section of the executable
@@ -93,6 +96,8 @@
   
   if (!CurrentSection.empty())
     O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
+
+  IsInTextSection = false;
 }
 
 
@@ -344,7 +349,7 @@
       O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
         << '_' << uid << "_set_" << MBB->getNumber();
     } else {
-      printBasicBlockLabel(MBB, false, false);
+      printBasicBlockLabel(MBB, false, false, false);
       // If the arch uses custom Jump Table directives, don't calc relative to
       // JT
       if (!HadJTEntryDirective) 
@@ -352,7 +357,7 @@
           << getFunctionNumber() << '_' << uid;
     }
   } else {
-    printBasicBlockLabel(MBB, false, false);
+    printBasicBlockLabel(MBB, false, false, false);
   }
 }
 
@@ -679,8 +684,7 @@
 //     Align = std::max(Align, ForcedAlignBits);
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
-                               unsigned ForcedAlignBits, bool UseFillExpr,
-                               unsigned FillValue) const {
+                               unsigned ForcedAlignBits) const {
   if (GV && GV->getAlignment())
     NumBits = Log2_32(GV->getAlignment());
   NumBits = std::max(NumBits, ForcedAlignBits);
@@ -688,6 +692,9 @@
   if (NumBits == 0) return;   // No need to emit alignment.
   if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
   O << TAI->getAlignDirective() << NumBits;
+
+  unsigned FillValue = TAI->getTextAlignFillValue();
+  bool UseFillExpr = IsInTextSection && FillValue;
   if (UseFillExpr) O << ",0x" << std::hex << FillValue << std::dec;
   O << "\n";
 }
@@ -1252,7 +1259,7 @@
 
           if (Modifier[0]=='l')  // labels are target independent
             printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), 
-                                 false, false);
+                                 false, false, false);
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if ((OpFlags & 7) == 4 /*ADDR MODE*/) {
@@ -1318,8 +1325,15 @@
 /// printBasicBlockLabel - This method prints the label for the specified
 /// MachineBasicBlock
 void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
+                                      bool printAlign, 
                                       bool printColon,
                                       bool printComment) const {
+  if (printAlign) {
+    unsigned Align = MBB->getAlignment();
+    if (Align)
+      EmitAlignment(Log2_32(Align));
+  }
+
   O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << "_"
     << MBB->getNumber();
   if (printColon)
@@ -1338,7 +1352,7 @@
   
   O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false);
+  printBasicBlockLabel(MBB, false, false, false);
   O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '\n';
 }
@@ -1351,7 +1365,7 @@
   O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << '_' << uid2
     << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false);
+  printBasicBlockLabel(MBB, false, false, false);
   O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '_' << uid2 << '\n';
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 2d7836f..d9874b5 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -44,7 +44,7 @@
             cl::desc("Max number of predecessors to consider tail merging"),
             cl::init(100), cl::Hidden);
 
-  struct BranchFolder : public MachineFunctionPass {
+  struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass {
     static char ID;
     explicit BranchFolder(bool defaultEnableTailMerge) : 
         MachineFunctionPass((intptr_t)&ID) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 7d7f33e..a77ccb7 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -56,7 +56,7 @@
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
 
 namespace {
-  class IfConverter : public MachineFunctionPass {
+  class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass {
     enum IfcvtKind {
       ICNotClassfied,  // BB data valid, but not classified.
       ICSimpleFalse,   // Same as ICSimple, but on the false path.
diff --git a/lib/CodeGen/LoopAligner.cpp b/lib/CodeGen/LoopAligner.cpp
new file mode 100644
index 0000000..a40bb50
--- /dev/null
+++ b/lib/CodeGen/LoopAligner.cpp
@@ -0,0 +1,65 @@
+//===-- LoopAligner.cpp - Loop aligner pass. ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that align loop headers to target specific
+// alignment boundary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopalign"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class LoopAligner : public MachineFunctionPass {
+    const TargetLowering *TLI;
+
+  public:
+    static char ID;
+    LoopAligner() : MachineFunctionPass((intptr_t)&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Loop aligner"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char LoopAligner::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createLoopAlignerPass() { return new LoopAligner(); }
+
+bool LoopAligner::runOnMachineFunction(MachineFunction &MF) {
+  const MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
+
+  if (MLI->begin() == MLI->end())
+    return false;  // No loops.
+
+  unsigned Align = MF.getTarget().getTargetLowering()->getPrefLoopAlignment();
+  if (!Align)
+    return false;  // Don't care about loop alignment.
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    if (MLI->isLoopHeader(MBB))
+      MBB->setAlignment(Align);
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index dc6a618..af91a2f 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -166,6 +166,7 @@
   if (LBB) OS << LBB->getName() << ": ";
   OS << (const void*)this
      << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
+  if (Alignment) OS << ", Alignment " << Alignment;
   if (isLandingPad()) OS << ", EH LANDING PAD";
   OS << ":\n";
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ba5a34b..ff5289e 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -206,6 +206,8 @@
   JumpBufSize = 0;
   JumpBufAlignment = 0;
   IfCvtBlockSizeLimit = 2;
+  IfCvtDupBlockSizeLimit = 0;
+  PrefLoopAlignment = 0;
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 31055b2..ed70771 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -248,7 +248,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
@@ -710,13 +710,13 @@
         << '_' << JTI << '_' << MO2.getImm()
         << "_set_" << MBB->getNumber();
     else if (TM.getRelocationModel() == Reloc::PIC_) {
-      printBasicBlockLabel(MBB, false, false);
+      printBasicBlockLabel(MBB, false, false, false);
       // If the arch uses custom Jump Table directives, don't calc relative to JT
       if (!TAI->getJumpTableDirective()) 
         O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
           << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
     } else
-      printBasicBlockLabel(MBB, false, false);
+      printBasicBlockLabel(MBB, false, false, false);
     if (i != e-1)
       O << '\n';
   }
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
index a46a64c..42e4b78 100644
--- a/lib/Target/Alpha/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -171,7 +171,7 @@
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
        I != E; ++I) {
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index d96ec9d..f627896 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -460,7 +460,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
diff --git a/lib/Target/IA64/IA64AsmPrinter.cpp b/lib/Target/IA64/IA64AsmPrinter.cpp
index 264329d..829730d 100644
--- a/lib/Target/IA64/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/IA64AsmPrinter.cpp
@@ -149,7 +149,7 @@
        I != E; ++I) {
     // Print a label for the basic block if there are any predecessors.
     if (!I->pred_empty()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index dc3242c..6c0c908 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -297,7 +297,7 @@
 
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
 
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 115e490..20b0d2a 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -604,7 +604,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
@@ -838,7 +838,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index d5b2020..01bd092 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -116,7 +116,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp
index 304e0f7..b9770fe 100644
--- a/lib/Target/X86/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -101,36 +101,25 @@
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
-    if (Subtarget->isTargetDarwin())
-      // FIXME: This should be parameterized somewhere.
-      EmitAlignment(4, F, 0, true, 0x90);
-    else
-      EmitAlignment(4, F);
+    EmitAlignment(4, F);
     break;
   case Function::DLLExportLinkage:
     DLLExportedFns.insert(Mang->makeNameProper(F->getName(), ""));
     //FALLS THROUGH
   case Function::ExternalLinkage:
-    if (Subtarget->isTargetDarwin())
-      // FIXME: This should be parameterized somewhere.
-      EmitAlignment(4, F, 0, true, 0x90);
-    else
-      EmitAlignment(4, F);
+    EmitAlignment(4, F);
     O << "\t.globl\t" << CurrentFnName << "\n";    
     break;
   case Function::LinkOnceLinkage:
   case Function::WeakLinkage:
+    EmitAlignment(4, F);
     if (Subtarget->isTargetDarwin()) {
-      // FIXME: This should be parameterized somewhere.
-      EmitAlignment(4, F, 0, true, 0x90);
       O << "\t.globl\t" << CurrentFnName << "\n";
       O << TAI->getWeakDefDirective() << CurrentFnName << "\n";
     } else if (Subtarget->isTargetCygMing()) {
-      EmitAlignment(4, F);
       O << "\t.globl\t" << CurrentFnName << "\n";
       O << "\t.linkonce discard\n";
     } else {
-      EmitAlignment(4, F);
       O << "\t.weak\t" << CurrentFnName << "\n";
     }
     break;
@@ -180,7 +169,7 @@
        I != E; ++I) {
     // Print a label for the basic block.
     if (!I->pred_empty()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
@@ -515,7 +504,7 @@
     
   O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false);
+  printBasicBlockLabel(MBB, false, false, false);
   if (Subtarget->isPICStyleRIPRel())
     O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
       << '_' << uid << '\n';
@@ -543,12 +532,12 @@
       O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
         << '_' << uid << "_set_" << MBB->getNumber();
     } else if (Subtarget->isPICStyleGOT()) {
-      printBasicBlockLabel(MBB, false, false);
+      printBasicBlockLabel(MBB, false, false, false);
       O << "@GOTOFF";
     } else
       assert(0 && "Don't know how to print MBB label for this PIC mode");
   } else
-    printBasicBlockLabel(MBB, false, false);
+    printBasicBlockLabel(MBB, false, false, false);
 }
 
 bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 50014b3..8acf779 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -714,6 +714,7 @@
   maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
   maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
   allowUnalignedMemoryAccesses = true; // x86 supports it!
+  setPrefLoopAlignment(16);
 }
 
 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
diff --git a/lib/Target/X86/X86IntelAsmPrinter.cpp b/lib/Target/X86/X86IntelAsmPrinter.cpp
index b753297..6c46b45 100644
--- a/lib/Target/X86/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/X86IntelAsmPrinter.cpp
@@ -78,7 +78,7 @@
        I != E; ++I) {
     // Print a label for the basic block if there are any predecessors.
     if (!I->pred_empty()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
@@ -242,7 +242,7 @@
   
   O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false);
+  printBasicBlockLabel(MBB, false, false, false);
   O << '-' << "\"L" << getFunctionNumber() << "$pb\"'\n";
 }
 
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index 42e63ec..acc13ba 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -47,6 +47,7 @@
   switch (Subtarget->TargetType) {
   case X86Subtarget::isDarwin:
     AlignmentIsInBytes = false;
+    TextAlignFillValue = 0x90;
     GlobalPrefix = "_";
     if (!Subtarget->is64Bit())
       Data64bitsDirective = 0;       // we can't emit a 64-bit unit
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 850eb38..ad2775a 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -164,6 +164,13 @@
   return true;  // -print-machineinstr should print after this.
 }
 
+bool X86TargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+  if (Fast) return false;
+
+  PM.add(createLoopAlignerPass());
+  return true;
+}
+
 bool X86TargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast, 
                                           std::ostream &Out) {
   PM.add(createX86CodePrinterPass(Out, *this));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index e9148b5..61e4451 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -63,6 +63,7 @@
   // Set up the pass pipeline.
   virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);  
   virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
+  virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
   virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast, 
                                   std::ostream &Out);
   virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,