Add a quick and dirty "loop aligner pass". x86 uses it to align its loops to 16-byte boundaries.

llvm-svn: 47703
diff --git a/llvm/lib/Target/IA64/IA64AsmPrinter.cpp b/llvm/lib/Target/IA64/IA64AsmPrinter.cpp
index 264329d..829730d 100644
--- a/llvm/lib/Target/IA64/IA64AsmPrinter.cpp
+++ b/llvm/lib/Target/IA64/IA64AsmPrinter.cpp
@@ -149,7 +149,7 @@
        I != E; ++I) {
     // Print a label for the basic block if there are any predecessors.
     if (!I->pred_empty()) {
-      printBasicBlockLabel(I, true);
+      printBasicBlockLabel(I, true, true);
       O << '\n';
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();