[ARM] Armv8.6-a Matrix Mul cmd line support
This patch upstreams support for the Armv8.6-a Matrix Multiplication
Extension. A summary of the features can be found here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
This patch includes:
- Command line options to enable these features with +i8mm, +f32mm, or f64mm
Note: +f32mm and +f64mm are optional and so are not enabled by default
This is part of a patch series, starting with BFloat16 support and
the other components in the armv8.6a extension (in previous patches
linked in phabricator)
Based on work by:
- Luke Geeson
- Oliver Stannard
- Luke Cheeseman
Reviewers: t.p.northover, DavidSpickett
Reviewed By: DavidSpickett
Subscribers: DavidSpickett, ostannard, kristof.beyls, danielkiss,
cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77875
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index b21cfac..4c034d4 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -54,7 +54,8 @@
// Decode AArch64 features from string like +[no]featureA+[no]featureB+...
static bool DecodeAArch64Features(const Driver &D, StringRef text,
- std::vector<StringRef> &Features) {
+ std::vector<StringRef> &Features,
+ llvm::AArch64::ArchKind ArchKind) {
SmallVector<StringRef, 8> Split;
text.split(Split, StringRef("+"), -1, false);
@@ -66,6 +67,11 @@
D.Diag(clang::diag::err_drv_no_neon_modifier);
else
return false;
+
+ // +sve implies +f32mm if the base architecture is v8.6A
+ // it isn't the case in general that sve implies both f64mm and f32mm
+ if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A) && Feature == "sve")
+ Features.push_back("+f32mm");
}
return true;
}
@@ -76,6 +82,7 @@
std::vector<StringRef> &Features) {
std::pair<StringRef, StringRef> Split = Mcpu.split("+");
CPU = Split.first;
+ llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::ARMV8A;
if (CPU == "native")
CPU = llvm::sys::getHostCPUName();
@@ -83,7 +90,7 @@
if (CPU == "generic") {
Features.push_back("+neon");
} else {
- llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseCPUArch(CPU);
+ ArchKind = llvm::AArch64::parseCPUArch(CPU);
if (!llvm::AArch64::getArchFeatures(ArchKind, Features))
return false;
@@ -92,10 +99,11 @@
return false;
}
- if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
- return false;
+ if (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind))
+ return false;
- return true;
+ return true;
}
static bool
@@ -108,7 +116,8 @@
llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first);
if (ArchKind == llvm::AArch64::ArchKind::INVALID ||
!llvm::AArch64::getArchFeatures(ArchKind, Features) ||
- (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)))
+ (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind)))
return false;
return true;