[clang,ARM] Initial ACLE intrinsics for MVE. This commit sets up the infrastructure for auto-generating <arm_mve.h> and doing clang-side code generation for the builtins it relies on, and demonstrates that it works by implementing a representative sample of the ACLE intrinsics, more or less matching the ones introduced in LLVM IR by D67158,D68699,D68700. Like NEON, that header file will provide a set of vector types like uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON, the ACLE spec for <arm_mve.h> includes a polymorphism system, so that you can write plain vaddq() and disambiguate by the vector types you pass to it. Unlike the corresponding NEON code, I've arranged to make every user- facing ACLE intrinsic into a clang builtin, and implement all the code generation inside clang. So <arm_mve.h> itself contains nothing but typedefs and function declarations, with the latter all using the new `__attribute__((__clang_builtin))` system to arrange that the user- facing function names correspond to the right internal BuiltinIDs. So the new MveEmitter tablegen system specifies the full sequence of IRBuilder operations that each user-facing ACLE intrinsic should translate into. Where possible, the ACLE intrinsics map to standard IR operations such as vector-typed `add` and `fadd`; where no standard representation exists, I call down to the sample IR intrinsics introduced in an earlier commit. Doing it like this means that you get the polymorphism for free just by using __attribute__((overloadable)): the clang overload resolution decides which function declaration is the relevant one, and _then_ its BuiltinID is looked up, so by the time we're doing code generation, that's all been resolved by the standard system. It also means that you get really nice error messages if the user passes the wrong combination of types: clang will show the declarations from the header file and explain why each one doesn't match. (The obvious alternative approach would be to have wrapper functions in <arm_mve.h> which pass their arguments to the underlying builtins. But that doesn't work in the case where one of the arguments has to be a constant integer: the wrapper function can't pass the constantness through. So you'd have to do that case using a macro instead, and then use C11 `_Generic` to handle the polymorphism. Then you have to add horrible workarounds because `_Generic` requires even the untaken branches to type-check successfully, and //then// if the user gets the types wrong, the error message is totally unreadable!) Reviewers: dmgreen, miyuki, ostannard Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67161

commit: 08074cc96557dcb7ec91d7cd84c412414fa9a516 [log] [tgz]
author: Simon Tatham <simon.tatham@arm.com> Mon Sep 02 15:50:50 2019 +0100
committer: Simon Tatham <simon.tatham@arm.com> Thu Oct 24 16:33:13 2019 +0100
tree: 11371e0a7950d1d4b5dc0e2d4476e1ba91b4672d
parent: 7c11da0cfd3396150c13657a2217f2044f314734 [diff]
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f9871b2..c35d0a8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp

@@ -4218,7 +4218,7 @@
   }
 
   // See if we have a target specific builtin that needs to be lowered.
-  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
+  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue))
     return RValue::get(V);
 
   ErrorUnsupported(E, "builtin function");
@@ -4229,13 +4229,14 @@
 
 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
                                         unsigned BuiltinID, const CallExpr *E,
+                                        ReturnValueSlot ReturnValue,
                                         llvm::Triple::ArchType Arch) {
   switch (Arch) {
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-    return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
+    return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
@@ -4268,15 +4269,16 @@
 }
 
 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
-                                              const CallExpr *E) {
+                                              const CallExpr *E,
+                                              ReturnValueSlot ReturnValue) {
   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
     return EmitTargetArchBuiltinExpr(
         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
-        getContext().getAuxTargetInfo()->getTriple().getArch());
+        ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
   }
 
-  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
+  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
                                    getTarget().getTriple().getArch());
 }
 
@@ -6004,6 +6006,7 @@
 
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E,
+                                           ReturnValueSlot ReturnValue,
                                            llvm::Triple::ArchType Arch) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
     return Hint;
@@ -6320,6 +6323,10 @@
     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
   }
 
+  // Deal with MVE builtins
+  if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
+    return Result;
+
   // Find out if any arguments are required to be integer constant
   // expressions.
   unsigned ICEArguments = 0;
@@ -6769,6 +6776,109 @@
   }
 }
 
+Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
+                                              const CallExpr *E,
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::Triple::ArchType Arch) {
+  enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
+  Intrinsic::ID IRIntr;
+  unsigned NumVectors;
+
+  // Code autogenerated by Tablegen will handle all the simple builtins.
+  switch (BuiltinID) {
+    #include "clang/Basic/arm_mve_builtin_cg.inc"
+
+    // If we didn't match an MVE builtin id at all, go back to the
+    // main EmitARMBuiltinExpr.
+  default:
+    return nullptr;
+  }
+
+  // Anything that breaks from that switch is an MVE builtin that
+  // needs handwritten code to generate.
+
+  switch (CustomCodeGenType) {
+
+  case CustomCodeGen::VLD24: {
+    llvm::SmallVector<Value *, 4> Ops;
+    llvm::SmallVector<llvm::Type *, 4> Tys;
+
+    auto MvecCType = E->getType();
+    auto MvecLType = ConvertType(MvecCType);
+    assert(MvecLType->isStructTy() &&
+           "Return type for vld[24]q should be a struct");
+    assert(MvecLType->getStructNumElements() == 1 &&
+           "Return-type struct for vld[24]q should have one element");
+    auto MvecLTypeInner = MvecLType->getStructElementType(0);
+    assert(MvecLTypeInner->isArrayTy() &&
+           "Return-type struct for vld[24]q should contain an array");
+    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
+           "Array member of return-type struct vld[24]q has wrong length");
+    auto VecLType = MvecLTypeInner->getArrayElementType();
+
+    Tys.push_back(VecLType);
+
+    auto Addr = E->getArg(0);
+    Ops.push_back(EmitScalarExpr(Addr));
+    Tys.push_back(ConvertType(Addr->getType()));
+
+    Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+    Value *LoadResult = Builder.CreateCall(F, Ops);
+    Value *MvecOut = UndefValue::get(MvecLType);
+    for (unsigned i = 0; i < NumVectors; ++i) {
+      Value *Vec = Builder.CreateExtractValue(LoadResult, i);
+      MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
+    }
+
+    if (ReturnValue.isNull())
+      return MvecOut;
+    else
+      return Builder.CreateStore(MvecOut, ReturnValue.getValue());
+  }
+
+  case CustomCodeGen::VST24: {
+    llvm::SmallVector<Value *, 4> Ops;
+    llvm::SmallVector<llvm::Type *, 4> Tys;
+
+    auto Addr = E->getArg(0);
+    Ops.push_back(EmitScalarExpr(Addr));
+    Tys.push_back(ConvertType(Addr->getType()));
+
+    auto MvecCType = E->getArg(1)->getType();
+    auto MvecLType = ConvertType(MvecCType);
+    assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
+    assert(MvecLType->getStructNumElements() == 1 &&
+           "Data-type struct for vst2q should have one element");
+    auto MvecLTypeInner = MvecLType->getStructElementType(0);
+    assert(MvecLTypeInner->isArrayTy() &&
+           "Data-type struct for vst2q should contain an array");
+    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
+           "Array member of return-type struct vld[24]q has wrong length");
+    auto VecLType = MvecLTypeInner->getArrayElementType();
+
+    Tys.push_back(VecLType);
+
+    AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
+    EmitAggExpr(E->getArg(1), MvecSlot);
+    auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
+    for (unsigned i = 0; i < NumVectors; i++)
+      Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
+
+    Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+    Value *ToReturn = nullptr;
+    for (unsigned i = 0; i < NumVectors; i++) {
+      Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
+      ToReturn = Builder.CreateCall(F, Ops);
+      Ops.pop_back();
+    }
+    return ToReturn;
+  }
+
+  default:
+    llvm_unreachable("bad CustomCodegen enum value");
+  }
+}
+
 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
                                       const CallExpr *E,
                                       SmallVectorImpl<Value *> &Ops,

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 99bc85b..2c20ba4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h

@@ -3726,14 +3726,19 @@
 
   /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call
   /// is unhandled by the current target.
-  llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue);
 
   llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
                                              const llvm::CmpInst::Predicate Fp,
                                              const llvm::CmpInst::Predicate Ip,
                                              const llvm::Twine &Name = "");
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                  ReturnValueSlot ReturnValue,
                                   llvm::Triple::ArchType Arch);
+  llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::Triple::ArchType Arch);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
                                          unsigned LLVMIntrinsic,

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 76ea66a..8ff648f 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt

@@ -182,6 +182,8 @@
 clang_generate_header(-gen-arm-neon arm_neon.td arm_neon.h)
 # Generate arm_fp16.h
 clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
+# Generate arm_mve.h
+clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
 
 add_custom_target(clang-resource-headers ALL DEPENDS ${out_files})
 set_target_properties(clang-resource-headers PROPERTIES

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index dca81d1..8322a9b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp

@@ -1717,6 +1717,14 @@
   return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
 }
 
+bool Sema::CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  switch (BuiltinID) {
+  default:
+    return false;
+  #include "clang/Basic/arm_mve_builtin_sema.inc"
+  }
+}
+
 bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                         unsigned MaxWidth) {
   assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
@@ -1857,6 +1865,8 @@
 
   if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
+  if (CheckMVEBuiltinFunctionCall(BuiltinID, TheCall))
+    return true;
 
   // For intrinsics which take an immediate value as part of the instruction,
   // range check them here.
@@ -6235,6 +6245,101 @@
   return false;
 }
 
+/// SemaBuiltinConstantArgPower2 - Check if argument ArgNum of TheCall is a
+/// constant expression representing a power of 2.
+bool Sema::SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum) {
+  llvm::APSInt Result;
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check constant-ness first.
+  if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
+    return true;
+
+  // Bit-twiddling to test for a power of 2: for x > 0, x & (x-1) is zero if
+  // and only if x is a power of 2.
+  if (Result.isStrictlyPositive() && (Result & (Result - 1)) == 0)
+    return false;
+
+  return Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
+         << Arg->getSourceRange();
+}
+
+static bool IsShiftedByte(llvm::APSInt Value) {
+  if (Value.isNegative())
+    return false;
+
+  // Check if it's a shifted byte, by shifting it down
+  while (true) {
+    // If the value fits in the bottom byte, the check passes.
+    if (Value < 0x100)
+      return true;
+
+    // Otherwise, if the value has _any_ bits in the bottom byte, the check
+    // fails.
+    if ((Value & 0xFF) != 0)
+      return false;
+
+    // If the bottom 8 bits are all 0, but something above that is nonzero,
+    // then shifting the value right by 8 bits won't affect whether it's a
+    // shifted byte or not. So do that, and go round again.
+    Value >>= 8;
+  }
+}
+
+/// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is
+/// a constant expression representing an arbitrary byte value shifted left by
+/// a multiple of 8 bits.
+bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
+  llvm::APSInt Result;
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check constant-ness first.
+  if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
+    return true;
+
+  if (IsShiftedByte(Result))
+    return false;
+
+  return Diag(TheCall->getBeginLoc(), diag::err_argument_not_shifted_byte)
+         << Arg->getSourceRange();
+}
+
+/// SemaBuiltinConstantArgShiftedByteOr0xFF - Check if argument ArgNum of
+/// TheCall is a constant expression representing either a shifted byte value,
+/// or a value of the form 0x??FF (i.e. a member of the arithmetic progression
+/// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some
+/// Arm MVE intrinsics.
+bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall,
+                                                   int ArgNum) {
+  llvm::APSInt Result;
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check constant-ness first.
+  if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
+    return true;
+
+  // Check to see if it's in either of the required forms.
+  if (IsShiftedByte(Result) ||
+      (Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF))
+    return false;
+
+  return Diag(TheCall->getBeginLoc(),
+              diag::err_argument_not_shifted_byte_or_xxff)
+         << Arg->getSourceRange();
+}
+
 /// SemaBuiltinARMMemoryTaggingCall - Handle calls of memory tagging extensions
 bool Sema::SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall) {
   if (BuiltinID == AArch64::BI__builtin_arm_irg) {

diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index cebb1c6..abbd597 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp

@@ -4832,8 +4832,13 @@
 }
 
 static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
-  // FIXME: this will be filled in by Tablegen which isn't written yet
-  return false;
+  if (AliasName.startswith("__arm_"))
+    AliasName = AliasName.substr(6);
+  switch (BuiltinID) {
+#include "clang/Basic/arm_mve_builtin_aliases.inc"
+  default:
+    return false;
+  }
 }
 
 static void handleArmMveAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index fccdb2b..02fc811 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp

@@ -7261,8 +7261,10 @@
 /// match one of the standard Neon vector types.
 static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
                                      Sema &S, VectorType::VectorKind VecKind) {
-  // Target must have NEON
-  if (!S.Context.getTargetInfo().hasFeature("neon")) {
+  // Target must have NEON (or MVE, whose vectors are similar enough
+  // not to need a separate attribute)
+  if (!S.Context.getTargetInfo().hasFeature("neon") &&
+      !S.Context.getTargetInfo().hasFeature("mve")) {
     S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr;
     Attr.setInvalid();
     return;
commit	08074cc96557dcb7ec91d7cd84c412414fa9a516	[log] [tgz]
author	Simon Tatham <simon.tatham@arm.com>	Mon Sep 02 15:50:50 2019 +0100
committer	Simon Tatham <simon.tatham@arm.com>	Thu Oct 24 16:33:13 2019 +0100
tree	11371e0a7950d1d4b5dc0e2d4476e1ba91b4672d
parent	7c11da0cfd3396150c13657a2217f2044f314734 [diff]