Add a flag to align all basic blocks in the function.
When debugging performance regressions we often ask ourselves if the regression
that we see is due to poor isel/sched/ra or due to some micro-architetural
problem. When comparing two code sequences one good way to rule out front-end
bottlenecks (and other the issues) is to force code alignment. This pass adds
a flag that forces the alignment of all of the basic blocks in the program.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179353 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index cd948e2..774df81 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks in the function."),
+ cl::init(0), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1083,6 +1089,14 @@
TLI = F.getTarget().getTargetLowering();
assert(BlockToChain.empty());
+ if (AlignAllBlock) {
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ FI->setAlignment(AlignAllBlock);
+ return true;
+ }
+
buildCFGChains(F);
BlockToChain.clear();
diff --git a/test/CodeGen/X86/code_placement_align_all.ll b/test/CodeGen/X86/code_placement_align_all.ll
new file mode 100644
index 0000000..2a36a3a
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_align_all.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -align-all-blocks=16 < %s | FileCheck %s
+
+;CHECK: foo
+;CHECK: .align 16, 0x90
+;CHECK: .align 16, 0x90
+;CHECK: .align 16, 0x90
+;CHECK: ret
+define i32 @foo(i32 %t, i32 %l) nounwind readnone ssp uwtable {
+ %1 = icmp eq i32 %t, 0
+ br i1 %1, label %4, label %2
+
+; <label>:2 ; preds = %0
+ %3 = add nsw i32 %t, 2
+ ret i32 %3
+
+; <label>:4 ; preds = %0
+ %5 = icmp eq i32 %l, 0
+ %. = select i1 %5, i32 0, i32 5
+ ret i32 %.
+}
+
+