[AArch64] Tie source and destination operands for AESMC/AESIMC.
Summary:
Most CPUs implementing AES fusion require instruction pairs of the form
AESE Vn, _
AESMC Vn, Vn
and
AESD Vn, _
AESIMC Vn, Vn
The constraint is added to AES(I)MC instructions which use the result of
an AES(E|D) instruction by using AES(I)MCTrr pseudo instructions, which
constraint source and destination registers to be the same.
A nice side effect of this change is that now all possible pairs are
scheduled back-to-back on the exynos-m1 for the misched-fusion-aes.ll
test case.
I had to update aes_load_store. The version I added initially was very
reduced and with the new constraint, AESE/AESMC could not be scheduled
back-to-back. I updated the test to be more realistic and still expose
the same scheduling problem as the initial test case.
Reviewers: t.p.northover, rengolin, evandro, kristof.beyls, silviu.baranga
Reviewed By: t.p.northover, evandro
Subscribers: aemerson, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D35299
llvm-svn: 309495
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4543025..fdb90f4 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -966,6 +966,18 @@
case AArch64::CMP_SWAP_128:
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
+ case AArch64::AESMCrrTied:
+ case AArch64::AESIMCrrTied: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
+ AArch64::AESIMCrr))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1));
+ transferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
}
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0dcf07f..5049a39 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -37,6 +37,9 @@
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<"FeatureSPE", "spe">;
+def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
+ AssemblerPredicate<"FeatureFuseAES",
+ "fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
@@ -5304,6 +5307,31 @@
def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
+// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
+// for AES fusion on some CPUs.
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+}
+
+// Only use constrained versions of AES(I)MC instructions if they are paired with
+// AESE/AESD.
+def : Pat<(v16i8 (int_aarch64_crypto_aesmc
+ (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
+def : Pat<(v16i8 (int_aarch64_crypto_aesimc
+ (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index ccc9d2a..963cfad 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -118,11 +118,13 @@
// Fuse AES crypto operations.
switch(SecondOpcode) {
// AES encode.
- case AArch64::AESMCrr :
+ case AArch64::AESMCrr:
+ case AArch64::AESMCrrTied:
return FirstOpcode == AArch64::AESErr ||
FirstOpcode == AArch64::INSTRUCTION_LIST_END;
// AES decode.
case AArch64::AESIMCrr:
+ case AArch64::AESIMCrrTied:
return FirstOpcode == AArch64::AESDrr ||
FirstOpcode == AArch64::INSTRUCTION_LIST_END;
}