X86-FMA3: Implemented commute transformation for EVEX/AVX512 FMA3 opcodes.
This helped to improved memory-folding and register coalescing optimizations.
Also, this patch fixed the tracker #17229.
Reviewer: Craig Topper.
Differential Revision: https://reviews.llvm.org/D23108
llvm-svn: 278431
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 7251aec..5c8de0f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_X86_X86INSTRINFO_H
#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86InstrFMA3Info.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -265,7 +266,7 @@
unsigned &SrcOpIdx2) const override;
/// Returns true if the routine could find two commutable operands
- /// in the given FMA instruction. Otherwise, returns false.
+ /// in the given FMA instruction \p MI. Otherwise, returns false.
///
/// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
/// The output indices of the commuted operands are returned in these
@@ -274,10 +275,12 @@
/// value 'CommuteAnyOperandIndex' which means that the corresponding
/// operand index is not set and this method is free to pick any of
/// available commutable operands.
+ /// The parameter \p FMA3Group keeps the reference to the group of relative
+ /// FMA3 opcodes including register/memory forms of 132/213/231 opcodes.
///
/// For example, calling this method this way:
/// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
- /// findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+ /// findFMA3CommutedOpIndices(MI, Idx1, Idx2, FMA3Group);
/// can be interpreted as a query asking if the operand #1 can be swapped
/// with any other available operand (e.g. operand #2, operand #3, etc.).
///
@@ -286,9 +289,30 @@
/// FMA213 #1, #2, #3
/// results into instruction with adjusted opcode:
/// FMA231 #3, #2, #1
- bool findFMA3CommutedOpIndices(MachineInstr &MI, bool IsIntrinOpcode,
+ bool findFMA3CommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const;
+ unsigned &SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const;
+
+ /// Returns an adjusted FMA opcode that must be used in FMA instruction that
+ /// performs the same computations as the given \p MI but which has the
+ /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+ /// It may return 0 if it is unsafe to commute the operands.
+ /// Note that a machine instruction (instead of its opcode) is passed as the
+ /// first parameter to make it possible to analyze the instruction's uses and
+ /// commute the first operand of FMA even when it seems unsafe when you look
+ /// at the opcode. For example, it is Ok to commute the first operand of
+ /// VFMADD*SD_Int, if ONLY the lowest 64-bit element of the result is used.
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given \p MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const;
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;