Selectively invert ICMP operands for better address optimization
Results in lower code size and more loads folded into cmp instructions.
BUG=none
R=eholk@chromium.org, jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/2124973005 .
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 3a07b52..2d67f37 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -65,9 +65,10 @@
// Using non-anonymous struct so that array_lengthof works.
const struct InstIcmpAttributes_ {
const char *DisplayString;
+ InstIcmp::ICond Reverse;
} InstIcmpAttributes[] = {
-#define X(tag, str) \
- { str } \
+#define X(tag, reverse, str) \
+ { str, InstIcmp::ICond::reverse } \
,
ICEINSTICMP_TABLE
#undef X
@@ -1085,6 +1086,10 @@
InstBreakpoint::InstBreakpoint(Cfg *Func)
: InstHighLevel(Func, Inst::Breakpoint, 0, nullptr) {}
+void InstIcmp::reverseConditionAndOperands() {
+ Condition = InstIcmpAttributes[Condition].Reverse;
+ std::swap(Srcs[0], Srcs[1]);
+}
bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
const auto *SrcVar = llvm::dyn_cast<const Variable>(Source);
if (!SrcVar)
diff --git a/src/IceInst.def b/src/IceInst.def
index 6bd2efb..6410d52 100644
--- a/src/IceInst.def
+++ b/src/IceInst.def
@@ -31,74 +31,74 @@
// representable in the destination format. This standard does not
// specify which of the input NaNs will provide the payload."
-#define ICEINSTARITHMETIC_TABLE \
- /* enum value, printable string, commutative */ \
- X(Add, "add", 1) \
- X(Fadd, "fadd", 1) \
- X(Sub, "sub", 0) \
- X(Fsub, "fsub", 0) \
- X(Mul, "mul", 1) \
- X(Fmul, "fmul", 1) \
- X(Udiv, "udiv", 0) \
- X(Sdiv, "sdiv", 0) \
- X(Fdiv, "fdiv", 0) \
- X(Urem, "urem", 0) \
- X(Srem, "srem", 0) \
- X(Frem, "frem", 0) \
- X(Shl, "shl", 0) \
- X(Lshr, "lshr", 0) \
- X(Ashr, "ashr", 0) \
- X(And, "and", 1) \
- X(Or, "or", 1) \
+#define ICEINSTARITHMETIC_TABLE \
+ /* enum value, printable string, commutative */ \
+ X(Add, "add", 1) \
+ X(Fadd, "fadd", 1) \
+ X(Sub, "sub", 0) \
+ X(Fsub, "fsub", 0) \
+ X(Mul, "mul", 1) \
+ X(Fmul, "fmul", 1) \
+ X(Udiv, "udiv", 0) \
+ X(Sdiv, "sdiv", 0) \
+ X(Fdiv, "fdiv", 0) \
+ X(Urem, "urem", 0) \
+ X(Srem, "srem", 0) \
+ X(Frem, "frem", 0) \
+ X(Shl, "shl", 0) \
+ X(Lshr, "lshr", 0) \
+ X(Ashr, "ashr", 0) \
+ X(And, "and", 1) \
+ X(Or, "or", 1) \
X(Xor, "xor", 1)
//#define X(tag, str, commutative)
-#define ICEINSTCAST_TABLE \
- /* enum value, printable string */ \
- X(Trunc, "trunc") \
- X(Zext, "zext") \
- X(Sext, "sext") \
- X(Fptrunc, "fptrunc") \
- X(Fpext, "fpext") \
- X(Fptoui, "fptoui") \
- X(Fptosi, "fptosi") \
- X(Uitofp, "uitofp") \
- X(Sitofp, "sitofp") \
+#define ICEINSTCAST_TABLE \
+ /* enum value, printable string */ \
+ X(Trunc, "trunc") \
+ X(Zext, "zext") \
+ X(Sext, "sext") \
+ X(Fptrunc, "fptrunc") \
+ X(Fpext, "fpext") \
+ X(Fptoui, "fptoui") \
+ X(Fptosi, "fptosi") \
+ X(Uitofp, "uitofp") \
+ X(Sitofp, "sitofp") \
X(Bitcast, "bitcast")
//#define X(tag, str)
-#define ICEINSTFCMP_TABLE \
- /* enum value, printable string */ \
- X(False, "false") \
- X(Oeq, "oeq") \
- X(Ogt, "ogt") \
- X(Oge, "oge") \
- X(Olt, "olt") \
- X(Ole, "ole") \
- X(One, "one") \
- X(Ord, "ord") \
- X(Ueq, "ueq") \
- X(Ugt, "ugt") \
- X(Uge, "uge") \
- X(Ult, "ult") \
- X(Ule, "ule") \
- X(Une, "une") \
- X(Uno, "uno") \
+#define ICEINSTFCMP_TABLE \
+ /* enum value, printable string */ \
+ X(False, "false") \
+ X(Oeq, "oeq") \
+ X(Ogt, "ogt") \
+ X(Oge, "oge") \
+ X(Olt, "olt") \
+ X(Ole, "ole") \
+ X(One, "one") \
+ X(Ord, "ord") \
+ X(Ueq, "ueq") \
+ X(Ugt, "ugt") \
+ X(Uge, "uge") \
+ X(Ult, "ult") \
+ X(Ule, "ule") \
+ X(Une, "une") \
+ X(Uno, "uno") \
X(True, "true")
//#define X(tag, str)
-#define ICEINSTICMP_TABLE \
- /* enum value, printable string */ \
- X(Eq, "eq") \
- X(Ne, "ne") \
- X(Ugt, "ugt") \
- X(Uge, "uge") \
- X(Ult, "ult") \
- X(Ule, "ule") \
- X(Sgt, "sgt") \
- X(Sge, "sge") \
- X(Slt, "slt") \
- X(Sle, "sle")
-//#define X(tag, str)
+#define ICEINSTICMP_TABLE \
+ /* enum value, reverse, printable string */ \
+ X(Eq, Eq, "eq") \
+ X(Ne, Ne, "ne") \
+ X(Ugt, Ult, "ugt") \
+ X(Uge, Ule, "uge") \
+ X(Ult, Ugt, "ult") \
+ X(Ule, Uge, "ule") \
+ X(Sgt, Slt, "sgt") \
+ X(Sge, Sle, "sge") \
+ X(Slt, Sgt, "slt") \
+ X(Sle, Sge, "sle")
+//#define X(tag, reverse, str)
#endif // SUBZERO_SRC_ICEINST_DEF
diff --git a/src/IceInst.h b/src/IceInst.h
index 029fcc5..b8e1b61 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -538,7 +538,7 @@
public:
enum ICond {
-#define X(tag, str) tag,
+#define X(tag, inverse, str) tag,
ICEINSTICMP_TABLE
#undef X
_num
@@ -550,6 +550,7 @@
InstIcmp(Func, Condition, Dest, Source1, Source2);
}
ICond getCondition() const { return Condition; }
+ void reverseConditionAndOperands();
bool isMemoryWrite() const override { return false; }
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Instr) { return Instr->getKind() == Icmp; }
@@ -558,7 +559,7 @@
InstIcmp(Cfg *Func, ICond Condition, Variable *Dest, Operand *Source1,
Operand *Source2);
- const ICond Condition;
+ ICond Condition;
};
/// InsertElement instruction.
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index f6c2a78..1b9ca23 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -344,6 +344,7 @@
}
void TargetLowering::doAddressOpt() {
+ doAddressOptOther();
if (llvm::isa<InstLoad>(*Context.getCur()))
doAddressOptLoad();
else if (llvm::isa<InstStore>(*Context.getCur()))
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index dcba5ce..43f5e01 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -396,6 +396,8 @@
virtual void genTargetHelperCallFor(Inst *Instr) = 0;
virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
+ /// Opportunity to modify other instructions to help Address Optimization
+ virtual void doAddressOptOther() {}
virtual void doAddressOptLoad() {}
virtual void doAddressOptStore() {}
virtual void doMockBoundsCheck(Operand *) {}
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 237d085..20536d2 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -170,7 +170,8 @@
_num
};
// Define a set of constants based on high-level table entries.
-#define X(tag, str) static constexpr int _icmp_hl_##tag = InstIcmp::tag;
+#define X(tag, reverse, str) \
+ static constexpr int _icmp_hl_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
@@ -183,7 +184,7 @@
#undef X
// Repeat the static asserts with respect to the high-level table entries in
// case the high-level table has extra entries.
-#define X(tag, str) \
+#define X(tag, reverse, str) \
static_assert( \
_icmp_hl_##tag == _icmp_ll_##tag, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 02dd7fa..fd1cf75 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -436,7 +436,7 @@
_num
};
// Define a set of constants based on high-level table entries.
-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
+#define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
@@ -450,7 +450,7 @@
#undef X
// Repeat the static asserts with respect to the high-level table entries in
// case the high-level table has extra entries.
-#define X(tag, str) \
+#define X(tag, reverse, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
"Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 9e6bf74..66a1581 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -775,7 +775,7 @@
_num
};
// Define a set of constants based on high-level table entries.
-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
+#define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
@@ -789,7 +789,7 @@
#undef X
// Repeat the static asserts with respect to the high-level table entries in
// case the high-level table has extra entries.
-#define X(tag, str) \
+#define X(tag, reverse, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
"Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index b1b2b82..2ff54e9 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -289,6 +289,7 @@
// <Relocatable + Offset>(Base, Index, Shift)
X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType,
Operand *Addr);
+ void doAddressOptOther() override;
void doAddressOptLoad() override;
void doAddressOptStore() override;
void doMockBoundsCheck(Operand *Opnd) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 203ca19..2acd331 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -5658,6 +5658,39 @@
}
template <typename TraitsType>
+void TargetX86Base<TraitsType>::doAddressOptOther() {
+ // Inverts some Icmp instructions which helps doAddressOptLoad later.
+ // TODO(manasijm): Refactor to unify the conditions for Var0 and Var1
+ Inst *Instr = Context.getCur();
+ auto *VMetadata = Func->getVMetadata();
+ if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Instr)) {
+ if (llvm::isa<Constant>(Icmp->getSrc(0)) ||
+ llvm::isa<Constant>(Icmp->getSrc(1)))
+ return;
+ auto *Var0 = llvm::dyn_cast<Variable>(Icmp->getSrc(0));
+ if (Var0 == nullptr)
+ return;
+ if (!VMetadata->isTracked(Var0))
+ return;
+ auto *Op0Def = VMetadata->getFirstDefinitionSingleBlock(Var0);
+ if (Op0Def == nullptr || !llvm::isa<InstLoad>(Op0Def))
+ return;
+ if (VMetadata->getLocalUseNode(Var0) != Context.getNode())
+ return;
+
+ auto *Var1 = llvm::dyn_cast<Variable>(Icmp->getSrc(1));
+ if (Var1 != nullptr && VMetadata->isTracked(Var1)) {
+ auto *Op1Def = VMetadata->getFirstDefinitionSingleBlock(Var1);
+ if (Op1Def != nullptr && !VMetadata->isMultiBlock(Var1) &&
+ llvm::isa<InstLoad>(Op1Def)) {
+ return; // Both are loads
+ }
+ }
+ Icmp->reverseConditionAndOperands();
+ }
+}
+
+template <typename TraitsType>
void TargetX86Base<TraitsType>::doAddressOptLoad() {
Inst *Instr = Context.getCur();
Operand *Addr = Instr->getSrc(0);