Updated to Clang 3.5a.
Change-Id: I8127eb568f674c2e72635b639a3295381fe8af82
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 468fe04..d3ec46c 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -11,8 +11,8 @@
#define CLANG_CODEGEN_ABIINFO_H
#include "clang/AST/Type.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Type.h"
namespace llvm {
class Value;
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 9108e24..1c4a0b7 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -3,6 +3,7 @@
clang_codegen_TBLGEN_TABLES := \
AttrList.inc \
Attrs.inc \
+ AttrVisitor.inc \
CommentCommandList.inc \
CommentNodes.inc \
DeclNodes.inc \
@@ -48,11 +49,11 @@
CodeGenAction.cpp \
CodeGenFunction.cpp \
CodeGenModule.cpp \
+ CodeGenPGO.cpp \
CodeGenTBAA.cpp \
CodeGenTypes.cpp \
ItaniumCXXABI.cpp \
MicrosoftCXXABI.cpp \
- MicrosoftVBTables.cpp \
ModuleBuilder.cpp \
TargetInfo.cpp
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 90b0f68..17f5f4d 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -13,13 +13,14 @@
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "clang/Frontend/Utils.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -36,6 +37,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
+#include <memory>
using namespace clang;
using namespace llvm;
@@ -55,38 +57,37 @@
mutable FunctionPassManager *PerFunctionPasses;
private:
- PassManager *getCodeGenPasses(TargetMachine *TM) const {
+ PassManager *getCodeGenPasses() const {
if (!CodeGenPasses) {
CodeGenPasses = new PassManager();
- CodeGenPasses->add(new DataLayout(TheModule));
+ CodeGenPasses->add(new DataLayoutPass(TheModule));
if (TM)
TM->addAnalysisPasses(*CodeGenPasses);
}
return CodeGenPasses;
}
- PassManager *getPerModulePasses(TargetMachine *TM) const {
+ PassManager *getPerModulePasses() const {
if (!PerModulePasses) {
PerModulePasses = new PassManager();
- PerModulePasses->add(new DataLayout(TheModule));
+ PerModulePasses->add(new DataLayoutPass(TheModule));
if (TM)
TM->addAnalysisPasses(*PerModulePasses);
}
return PerModulePasses;
}
- FunctionPassManager *getPerFunctionPasses(TargetMachine *TM) const {
+ FunctionPassManager *getPerFunctionPasses() const {
if (!PerFunctionPasses) {
PerFunctionPasses = new FunctionPassManager(TheModule);
- PerFunctionPasses->add(new DataLayout(TheModule));
+ PerFunctionPasses->add(new DataLayoutPass(TheModule));
if (TM)
TM->addAnalysisPasses(*PerFunctionPasses);
}
return PerFunctionPasses;
}
-
- void CreatePasses(TargetMachine *TM);
+ void CreatePasses();
/// CreateTargetMachine - Generates the TargetMachine.
/// Returns Null if it is unable to create the target machine.
@@ -101,8 +102,7 @@
/// AddEmitPasses - Add passes necessary to emit assembly or LLVM IR.
///
/// \return True on success.
- bool AddEmitPasses(BackendAction Action, formatted_raw_ostream &OS,
- TargetMachine *TM);
+ bool AddEmitPasses(BackendAction Action, formatted_raw_ostream &OS);
public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags,
@@ -118,8 +118,12 @@
delete CodeGenPasses;
delete PerModulePasses;
delete PerFunctionPasses;
+ if (CodeGenOpts.DisableFree)
+ BuryPointer(TM.release());
}
+ std::unique_ptr<TargetMachine> TM;
+
void EmitAssembly(BackendAction Action, raw_ostream *OS);
};
@@ -162,6 +166,11 @@
PM.add(createSampleProfileLoaderPass(CGOpts.SampleProfileFile));
}
+static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
+ PassManagerBase &PM) {
+ PM.add(createAddDiscriminatorsPass());
+}
+
static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
PassManagerBase &PM) {
PM.add(createBoundsCheckingPass());
@@ -177,12 +186,10 @@
LangOpts.Sanitize.InitOrder,
LangOpts.Sanitize.UseAfterReturn,
LangOpts.Sanitize.UseAfterScope,
- CGOpts.SanitizerBlacklistFile,
- CGOpts.SanitizeAddressZeroBaseShadow));
+ CGOpts.SanitizerBlacklistFile));
PM.add(createAddressSanitizerModulePass(
LangOpts.Sanitize.InitOrder,
- CGOpts.SanitizerBlacklistFile,
- CGOpts.SanitizeAddressZeroBaseShadow));
+ CGOpts.SanitizerBlacklistFile));
}
static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -222,7 +229,7 @@
PM.add(createDataFlowSanitizerPass(CGOpts.SanitizerBlacklistFile));
}
-void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
+void EmitAssemblyHelper::CreatePasses() {
unsigned OptLevel = CodeGenOpts.OptimizationLevel;
CodeGenOptions::InliningMethod Inlining = CodeGenOpts.getInlining();
@@ -244,6 +251,9 @@
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
+ PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+ addAddDiscriminatorsPass);
+
if (!CodeGenOpts.SampleProfileFile.empty())
PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
addSampleProfileLoaderPass);
@@ -298,19 +308,12 @@
PMBuilder.LibraryInfo = new TargetLibraryInfo(TargetTriple);
if (!CodeGenOpts.SimplifyLibCalls)
PMBuilder.LibraryInfo->disableAllFunctions();
-
+
switch (Inlining) {
case CodeGenOptions::NoInlining: break;
case CodeGenOptions::NormalInlining: {
- // FIXME: Derive these constants in a principled fashion.
- unsigned Threshold = 225;
- if (CodeGenOpts.OptimizeSize == 1) // -Os
- Threshold = 75;
- else if (CodeGenOpts.OptimizeSize == 2) // -Oz
- Threshold = 25;
- else if (OptLevel > 2)
- Threshold = 275;
- PMBuilder.Inliner = createFunctionInliningPass(Threshold);
+ PMBuilder.Inliner =
+ createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
break;
}
case CodeGenOptions::OnlyAlwaysInlining:
@@ -324,13 +327,13 @@
}
// Set up the per-function pass manager.
- FunctionPassManager *FPM = getPerFunctionPasses(TM);
+ FunctionPassManager *FPM = getPerFunctionPasses();
if (CodeGenOpts.VerifyModule)
FPM->add(createVerifierPass());
PMBuilder.populateFunctionPassManager(*FPM);
// Set up the per-module pass manager.
- PassManager *MPM = getPerModulePasses(TM);
+ PassManager *MPM = getPerModulePasses();
if (!CodeGenOpts.DisableGCov &&
(CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -437,6 +440,12 @@
llvm::TargetOptions Options;
+ if (CodeGenOpts.DisableIntegratedAS)
+ Options.DisableIntegratedAS = true;
+
+ if (CodeGenOpts.CompressDebugSections)
+ Options.CompressDebugSections = true;
+
// Set frame pointer elimination mode.
if (!CodeGenOpts.DisableFPElim) {
Options.NoFramePointerElim = false;
@@ -503,11 +512,10 @@
}
bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
- formatted_raw_ostream &OS,
- TargetMachine *TM) {
+ formatted_raw_ostream &OS) {
// Create the code generator passes.
- PassManager *PM = getCodeGenPasses(TM);
+ PassManager *PM = getCodeGenPasses();
// Add LibraryInfo.
llvm::Triple TargetTriple(TheModule->getTargetTriple());
@@ -552,27 +560,28 @@
bool UsesCodeGen = (Action != Backend_EmitNothing &&
Action != Backend_EmitBC &&
Action != Backend_EmitLL);
- TargetMachine *TM = CreateTargetMachine(UsesCodeGen);
+ if (!TM)
+ TM.reset(CreateTargetMachine(UsesCodeGen));
+
if (UsesCodeGen && !TM) return;
- llvm::OwningPtr<TargetMachine> TMOwner(CodeGenOpts.DisableFree ? 0 : TM);
- CreatePasses(TM);
+ CreatePasses();
switch (Action) {
case Backend_EmitNothing:
break;
case Backend_EmitBC:
- getPerModulePasses(TM)->add(createBitcodeWriterPass(*OS));
+ getPerModulePasses()->add(createBitcodeWriterPass(*OS));
break;
case Backend_EmitLL:
FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
- getPerModulePasses(TM)->add(createPrintModulePass(&FormattedOS));
+ getPerModulePasses()->add(createPrintModulePass(FormattedOS));
break;
default:
FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
- if (!AddEmitPasses(Action, FormattedOS, TM))
+ if (!AddEmitPasses(Action, FormattedOS))
return;
}
@@ -607,10 +616,23 @@
void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
const CodeGenOptions &CGOpts,
const clang::TargetOptions &TOpts,
- const LangOptions &LOpts,
- Module *M,
- BackendAction Action, raw_ostream *OS) {
+ const LangOptions &LOpts, StringRef TDesc,
+ Module *M, BackendAction Action,
+ raw_ostream *OS) {
EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
AsmHelper.EmitAssembly(Action, OS);
+
+ // If an optional clang TargetInfo description string was passed in, use it to
+ // verify the LLVM TargetMachine's DataLayout.
+ if (AsmHelper.TM && !TDesc.empty()) {
+ std::string DLDesc =
+ AsmHelper.TM->getDataLayout()->getStringRepresentation();
+ if (DLDesc != TDesc) {
+ unsigned DiagID = Diags.getCustomDiagID(
+ DiagnosticsEngine::Error, "backend data layout '%0' does not match "
+ "expected target description '%1'");
+ Diags.Report(DiagID) << DLDesc << TDesc;
+ }
+ }
}
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 0df2a40..7c7c80c 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -24,16 +24,6 @@
using namespace clang;
using namespace CodeGen;
-// The ABI values for various atomic memory orderings.
-enum AtomicOrderingKind {
- AO_ABI_memory_order_relaxed = 0,
- AO_ABI_memory_order_consume = 1,
- AO_ABI_memory_order_acquire = 2,
- AO_ABI_memory_order_release = 3,
- AO_ABI_memory_order_acq_rel = 4,
- AO_ABI_memory_order_seq_cst = 5
-};
-
namespace {
class AtomicInfo {
CodeGenFunction &CGF;
@@ -57,10 +47,10 @@
ASTContext &C = CGF.getContext();
uint64_t valueAlignInBits;
- llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
+ std::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
uint64_t atomicAlignInBits;
- llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
+ std::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
assert(ValueSizeInBits <= AtomicSizeInBits);
assert(valueAlignInBits <= atomicAlignInBits);
@@ -184,10 +174,134 @@
return true;
}
-static void
-EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
- llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
- uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
+static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E,
+ llvm::Value *Dest, llvm::Value *Ptr,
+ llvm::Value *Val1, llvm::Value *Val2,
+ uint64_t Size, unsigned Align,
+ llvm::AtomicOrdering SuccessOrder,
+ llvm::AtomicOrdering FailureOrder) {
+ // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
+ llvm::LoadInst *Expected = CGF.Builder.CreateLoad(Val1);
+ Expected->setAlignment(Align);
+ llvm::LoadInst *Desired = CGF.Builder.CreateLoad(Val2);
+ Desired->setAlignment(Align);
+
+ llvm::AtomicCmpXchgInst *Old = CGF.Builder.CreateAtomicCmpXchg(
+ Ptr, Expected, Desired, SuccessOrder, FailureOrder);
+ Old->setVolatile(E->isVolatile());
+
+ // Cmp holds the result of the compare-exchange operation: true on success,
+ // false on failure.
+ llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(Old, Expected);
+
+ // This basic block is used to hold the store instruction if the operation
+ // failed.
+ llvm::BasicBlock *StoreExpectedBB =
+ CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
+
+ // This basic block is the exit point of the operation, we should end up
+ // here regardless of whether or not the operation succeeded.
+ llvm::BasicBlock *ContinueBB =
+ CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
+
+ // Update Expected if Expected isn't equal to Old, otherwise branch to the
+ // exit point.
+ CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
+
+ CGF.Builder.SetInsertPoint(StoreExpectedBB);
+ // Update the memory at Expected with Old's value.
+ llvm::StoreInst *StoreExpected = CGF.Builder.CreateStore(Old, Val1);
+ StoreExpected->setAlignment(Align);
+ // Finally, branch to the exit point.
+ CGF.Builder.CreateBr(ContinueBB);
+
+ CGF.Builder.SetInsertPoint(ContinueBB);
+ // Update the memory at Dest with Cmp's value.
+ CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+ return;
+}
+
+/// Given an ordering required on success, emit all possible cmpxchg
+/// instructions to cope with the provided (but possibly only dynamically known)
+/// FailureOrder.
+static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
+ llvm::Value *Dest, llvm::Value *Ptr,
+ llvm::Value *Val1, llvm::Value *Val2,
+ llvm::Value *FailureOrderVal,
+ uint64_t Size, unsigned Align,
+ llvm::AtomicOrdering SuccessOrder) {
+ llvm::AtomicOrdering FailureOrder;
+ if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
+ switch (FO->getSExtValue()) {
+ default:
+ FailureOrder = llvm::Monotonic;
+ break;
+ case AtomicExpr::AO_ABI_memory_order_consume:
+ case AtomicExpr::AO_ABI_memory_order_acquire:
+ FailureOrder = llvm::Acquire;
+ break;
+ case AtomicExpr::AO_ABI_memory_order_seq_cst:
+ FailureOrder = llvm::SequentiallyConsistent;
+ break;
+ }
+ if (FailureOrder >= SuccessOrder) {
+ // Don't assert on undefined behaviour.
+ FailureOrder =
+ llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
+ }
+ emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2, Size, Align, SuccessOrder,
+ FailureOrder);
+ return;
+ }
+
+ // Create all the relevant BB's
+ llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *SeqCstBB = 0;
+ MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
+ if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release)
+ AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
+ if (SuccessOrder == llvm::SequentiallyConsistent)
+ SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
+
+ llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
+
+ llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
+
+ // Emit all the different atomics
+
+ // MonotonicBB is arbitrarily chosen as the default case; in practice, this
+ // doesn't matter unless someone is crazy enough to use something that
+ // doesn't fold to a constant for the ordering.
+ CGF.Builder.SetInsertPoint(MonotonicBB);
+ emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2,
+ Size, Align, SuccessOrder, llvm::Monotonic);
+ CGF.Builder.CreateBr(ContBB);
+
+ if (AcquireBB) {
+ CGF.Builder.SetInsertPoint(AcquireBB);
+ emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2,
+ Size, Align, SuccessOrder, llvm::Acquire);
+ CGF.Builder.CreateBr(ContBB);
+ SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+ AcquireBB);
+ SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+ AcquireBB);
+ }
+ if (SeqCstBB) {
+ CGF.Builder.SetInsertPoint(SeqCstBB);
+ emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2,
+ Size, Align, SuccessOrder, llvm::SequentiallyConsistent);
+ CGF.Builder.CreateBr(ContBB);
+ SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+ SeqCstBB);
+ }
+
+ CGF.Builder.SetInsertPoint(ContBB);
+}
+
+static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
+ llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
+ llvm::Value *FailureOrder, uint64_t Size,
+ unsigned Align, llvm::AtomicOrdering Order) {
llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
@@ -198,23 +312,10 @@
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange:
- case AtomicExpr::AO__atomic_compare_exchange_n: {
- // Note that cmpxchg only supports specifying one ordering and
- // doesn't support weak cmpxchg, at least at the moment.
- llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
- LoadVal1->setAlignment(Align);
- llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
- LoadVal2->setAlignment(Align);
- llvm::AtomicCmpXchgInst *CXI =
- CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
- CXI->setVolatile(E->isVolatile());
- llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
- StoreVal1->setAlignment(Align);
- llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
- CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+ case AtomicExpr::AO__atomic_compare_exchange_n:
+ emitAtomicCmpXchgFailureSet(CGF, E, Dest, Ptr, Val1, Val2, FailureOrder,
+ Size, Align, Order);
return;
- }
-
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load: {
@@ -476,6 +577,8 @@
Args.add(RValue::get(EmitCastToVoidPtr(Ptr)), getContext().VoidPtrTy);
std::string LibCallName;
+ QualType LoweredMemTy =
+ MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
QualType RetTy;
bool HaveRetTy = false;
switch (E->getOp()) {
@@ -531,7 +634,7 @@
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
LibCallName = "__atomic_fetch_add";
- AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
E->getExprLoc());
break;
// T __atomic_fetch_and_N(T *mem, T val, int order)
@@ -552,7 +655,7 @@
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
LibCallName = "__atomic_fetch_sub";
- AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
E->getExprLoc());
break;
// T __atomic_fetch_xor_N(T *mem, T val, int order)
@@ -615,32 +718,32 @@
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
switch (ord) {
- case AO_ABI_memory_order_relaxed:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Monotonic);
+ case AtomicExpr::AO_ABI_memory_order_relaxed:
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Monotonic);
break;
- case AO_ABI_memory_order_consume:
- case AO_ABI_memory_order_acquire:
+ case AtomicExpr::AO_ABI_memory_order_consume:
+ case AtomicExpr::AO_ABI_memory_order_acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Acquire);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Acquire);
break;
- case AO_ABI_memory_order_release:
+ case AtomicExpr::AO_ABI_memory_order_release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Release);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Release);
break;
- case AO_ABI_memory_order_acq_rel:
+ case AtomicExpr::AO_ABI_memory_order_acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::AcquireRelease);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::AcquireRelease);
break;
- case AO_ABI_memory_order_seq_cst:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::SequentiallyConsistent);
+ case AtomicExpr::AO_ABI_memory_order_seq_cst:
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::SequentiallyConsistent);
break;
default: // invalid order
// We should not ever get here normally, but it's hard to
@@ -676,36 +779,41 @@
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Monotonic);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Monotonic);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Acquire);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Acquire);
Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(1), AcquireBB);
- SI->addCase(Builder.getInt32(2), AcquireBB);
+ SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+ AcquireBB);
+ SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+ AcquireBB);
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::Release);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::Release);
Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(3), ReleaseBB);
+ SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release),
+ ReleaseBB);
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::AcquireRelease);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::AcquireRelease);
Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(4), AcqRelBB);
+ SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel),
+ AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
- llvm::SequentiallyConsistent);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail,
+ Size, Align, llvm::SequentiallyConsistent);
Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(5), SeqCstBB);
+ SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+ SeqCstBB);
// Cleanup and return
Builder.SetInsertPoint(ContBB);
@@ -761,8 +869,8 @@
getContext().VoidPtrTy);
args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
getContext().VoidPtrTy);
- args.add(RValue::get(llvm::ConstantInt::get(IntTy,
- AO_ABI_memory_order_seq_cst)),
+ args.add(RValue::get(llvm::ConstantInt::get(
+ IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
getContext().IntTy);
emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
@@ -911,8 +1019,8 @@
getContext().VoidPtrTy);
args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
getContext().VoidPtrTy);
- args.add(RValue::get(llvm::ConstantInt::get(IntTy,
- AO_ABI_memory_order_seq_cst)),
+ args.add(RValue::get(llvm::ConstantInt::get(
+ IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
getContext().IntTy);
emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
return;
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 692f9a0..15b08d4 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -18,9 +18,9 @@
#include "CodeGenModule.h"
#include "clang/AST/DeclObjC.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include <algorithm>
#include <cstdio>
@@ -53,7 +53,7 @@
return CodeGenFunction(CGM).GenerateCopyHelperFunction(blockInfo);
}
-/// Build the helper function to dipose of a block.
+/// Build the helper function to dispose of a block.
static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
const CGBlockInfo &blockInfo) {
return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo);
@@ -300,8 +300,8 @@
// The header is basically a 'struct { void *; int; int; void *; void *; }'.
CharUnits ptrSize, ptrAlign, intSize, intAlign;
- llvm::tie(ptrSize, ptrAlign) = C.getTypeInfoInChars(C.VoidPtrTy);
- llvm::tie(intSize, intAlign) = C.getTypeInfoInChars(C.IntTy);
+ std::tie(ptrSize, ptrAlign) = C.getTypeInfoInChars(C.VoidPtrTy);
+ std::tie(intSize, intAlign) = C.getTypeInfoInChars(C.IntTy);
// Are there crazy embedded platforms where this isn't true?
assert(intSize <= ptrSize && "layout assumptions horribly violated");
@@ -370,11 +370,10 @@
}
// Next, all the block captures.
- for (BlockDecl::capture_const_iterator ci = block->capture_begin(),
- ce = block->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : block->captures()) {
+ const VarDecl *variable = CI.getVariable();
- if (ci->isByRef()) {
+ if (CI.isByRef()) {
// We have to copy/dispose of the __block reference.
info.NeedsCopyDispose = true;
@@ -387,8 +386,7 @@
maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
- Qualifiers::OCL_None,
- &*ci, llvmType));
+ Qualifiers::OCL_None, &CI, llvmType));
continue;
}
@@ -422,7 +420,7 @@
lifetime = Qualifiers::OCL_Strong;
// So do types that require non-trivial copy construction.
- } else if (ci->hasCopyExpr()) {
+ } else if (CI.hasCopyExpr()) {
info.NeedsCopyDispose = true;
info.HasCXXObject = true;
@@ -446,7 +444,7 @@
llvm::Type *llvmType =
CGM.getTypes().ConvertTypeForMem(VT);
- layout.push_back(BlockLayoutChunk(align, size, lifetime, &*ci, llvmType));
+ layout.push_back(BlockLayoutChunk(align, size, lifetime, &CI, llvmType));
}
// If that was everything, we're done here.
@@ -581,14 +579,13 @@
// Walk through the captures (in order) and find the ones not
// captured by constant.
- for (BlockDecl::capture_const_iterator ci = block->capture_begin(),
- ce = block->capture_end(); ci != ce; ++ci) {
+ for (const auto &CI : block->captures()) {
// Ignore __block captures; there's nothing special in the
// on-stack block that we need to do for them.
- if (ci->isByRef()) continue;
+ if (CI.isByRef()) continue;
// Ignore variables that are constant-captured.
- const VarDecl *variable = ci->getVariable();
+ const VarDecl *variable = CI.getVariable();
CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
if (capture.isConstant()) continue;
@@ -679,7 +676,7 @@
}
// Find the block info for this block and take ownership of it.
- OwningPtr<CGBlockInfo> blockInfo;
+ std::unique_ptr<CGBlockInfo> blockInfo;
blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo,
blockExpr->getBlockDecl()));
@@ -741,9 +738,8 @@
}
// Next, captured variables.
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
// Ignore constant captures.
@@ -761,7 +757,7 @@
// Compute the address of the thing we're going to move into the
// block literal.
llvm::Value *src;
- if (BlockInfo && ci->isNested()) {
+ if (BlockInfo && CI.isNested()) {
// We need to use the capture from the enclosing block.
const CGBlockInfo::Capture &enclosingCapture =
BlockInfo->getCapture(variable);
@@ -780,8 +776,8 @@
// emission.
src = LocalDeclMap.lookup(variable);
if (!src) {
- DeclRefExpr declRef(const_cast<VarDecl*>(variable),
- /*refersToEnclosing*/ ci->isNested(), type,
+ DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+ /*refersToEnclosing*/ CI.isNested(), type,
VK_LValue, SourceLocation());
src = EmitDeclRefLValue(&declRef).getAddress();
}
@@ -791,9 +787,9 @@
// the block field. There's no need to chase the forwarding
// pointer at this point, since we're building something that will
// live a shorter life than the stack byref anyway.
- if (ci->isByRef()) {
+ if (CI.isByRef()) {
// Get a void* that points to the byref struct.
- if (ci->isNested())
+ if (CI.isNested())
src = Builder.CreateAlignedLoad(src, align.getQuantity(),
"byref.capture");
else
@@ -803,7 +799,7 @@
Builder.CreateAlignedStore(src, blockField, align.getQuantity());
// If we have a copy constructor, evaluate that into the block field.
- } else if (const Expr *copyExpr = ci->getCopyExpr()) {
+ } else if (const Expr *copyExpr = CI.getCopyExpr()) {
if (blockDecl->isConversionFromLambda()) {
// If we have a lambda conversion, emit the expression
// directly into the block instead.
@@ -851,7 +847,7 @@
// We use one of these or the other depending on whether the
// reference is nested.
DeclRefExpr declRef(const_cast<VarDecl*>(variable),
- /*refersToEnclosing*/ ci->isNested(), type,
+ /*refersToEnclosing*/ CI.isNested(), type,
VK_LValue, SourceLocation());
ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue,
@@ -862,7 +858,7 @@
}
// Activate the cleanup if layout pushed one.
- if (!ci->isByRef()) {
+ if (!CI.isByRef()) {
EHScopeStack::stable_iterator cleanup = capture.getCleanup();
if (cleanup.isValid())
ActivateCleanupBlock(cleanup, blockInfo.DominatingIP);
@@ -1117,17 +1113,15 @@
args.push_back(&selfDecl);
// Now add the rest of the parameters.
- for (BlockDecl::param_const_iterator i = blockDecl->param_begin(),
- e = blockDecl->param_end(); i != e; ++i)
- args.push_back(*i);
+ for (auto i : blockDecl->params())
+ args.push_back(i);
// Create the function declaration.
const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType();
- const CGFunctionInfo &fnInfo =
- CGM.getTypes().arrangeFunctionDeclaration(fnType->getResultType(), args,
- fnType->getExtInfo(),
- fnType->isVariadic());
- if (CGM.ReturnTypeUsesSRet(fnInfo))
+ const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ fnType->getReturnType(), args, fnType->getExtInfo(),
+ fnType->isVariadic());
+ if (CGM.ReturnSlotInterferesWithArgs(fnInfo))
blockInfo.UsesStret = true;
llvm::FunctionType *fnLLVMType = CGM.getTypes().GetFunctionType(fnInfo);
@@ -1140,7 +1134,7 @@
CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo);
// Begin generating the function.
- StartFunction(blockDecl, fnType->getResultType(), fn, fnInfo, args,
+ StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
blockInfo.getBlockExpr()->getBody()->getLocStart());
// Okay. Undo some of what StartFunction did.
@@ -1177,9 +1171,8 @@
}
// Also force all the constant captures.
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
if (!capture.isConstant()) continue;
@@ -1201,8 +1194,14 @@
if (IsLambdaConversionToBlock)
EmitLambdaBlockInvokeBody();
- else
+ else {
+ PGO.assignRegionCounters(blockDecl, fn);
+ RegionCounter Cnt = getPGORegionCounter(blockDecl->getBody());
+ Cnt.beginRegion(Builder);
EmitStmt(blockDecl->getBody());
+ PGO.emitInstrumentationData();
+ PGO.destroyRegionCounters();
+ }
// Remember where we were...
llvm::BasicBlock *resume = Builder.GetInsertBlock();
@@ -1214,9 +1213,8 @@
// Emit debug information for all the DeclRefExprs.
// FIXME: also for 'this'
if (CGDebugInfo *DI = getDebugInfo()) {
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
DI->EmitLocation(Builder, variable->getLocation());
if (CGM.getCodeGenOpts().getDebugInfo()
@@ -1285,10 +1283,8 @@
ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
args.push_back(&srcDecl);
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
- FunctionType::ExtInfo(),
- /*variadic*/ false);
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
// FIXME: it would be nice if these were mergeable with things with
// identical semantics.
@@ -1325,25 +1321,24 @@
const BlockDecl *blockDecl = blockInfo.getBlockDecl();
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
QualType type = variable->getType();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
if (capture.isConstant()) continue;
- const Expr *copyExpr = ci->getCopyExpr();
+ const Expr *copyExpr = CI.getCopyExpr();
BlockFieldFlags flags;
bool useARCWeakCopy = false;
bool useARCStrongCopy = false;
if (copyExpr) {
- assert(!ci->isByRef());
+ assert(!CI.isByRef());
// don't bother computing flags
- } else if (ci->isByRef()) {
+ } else if (CI.isByRef()) {
flags = BLOCK_FIELD_IS_BYREF;
if (type.isObjCGCWeak())
flags |= BLOCK_FIELD_IS_WEAK;
@@ -1423,7 +1418,7 @@
};
bool copyCanThrow = false;
- if (ci->isByRef() && variable->getType()->getAsCXXRecordDecl()) {
+ if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
const Expr *copyExpr =
CGM.getContext().getBlockVarCopyInits(variable);
if (copyExpr) {
@@ -1460,10 +1455,8 @@
ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
args.push_back(&srcDecl);
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
- FunctionType::ExtInfo(),
- /*variadic*/ false);
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
// FIXME: We'd like to put these into a mergable by content, with
// internal linkage.
@@ -1496,9 +1489,8 @@
CodeGenFunction::RunCleanupsScope cleanups(*this);
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
QualType type = variable->getType();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
@@ -1510,7 +1502,7 @@
bool useARCWeakDestroy = false;
bool useARCStrongDestroy = false;
- if (ci->isByRef()) {
+ if (CI.isByRef()) {
flags = BLOCK_FIELD_IS_BYREF;
if (type.isObjCGCWeak())
flags |= BLOCK_FIELD_IS_WEAK;
@@ -1587,7 +1579,7 @@
: ByrefHelpers(alignment), Flags(flags) {}
void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
- llvm::Value *srcField) {
+ llvm::Value *srcField) override {
destField = CGF.Builder.CreateBitCast(destField, CGF.VoidPtrTy);
srcField = CGF.Builder.CreateBitCast(srcField, CGF.VoidPtrPtrTy);
@@ -1602,14 +1594,14 @@
CGF.EmitNounwindRuntimeCall(fn, args);
}
- void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+ void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0));
llvm::Value *value = CGF.Builder.CreateLoad(field);
CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER);
}
- void profileImpl(llvm::FoldingSetNodeID &id) const {
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
id.AddInteger(Flags.getBitMask());
}
};
@@ -1620,15 +1612,15 @@
ARCWeakByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
- llvm::Value *srcField) {
+ llvm::Value *srcField) override {
CGF.EmitARCMoveWeak(destField, srcField);
}
- void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+ void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
CGF.EmitARCDestroyWeak(field);
}
- void profileImpl(llvm::FoldingSetNodeID &id) const {
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
// 0 is distinguishable from all pointers and byref flags
id.AddInteger(0);
}
@@ -1641,7 +1633,7 @@
ARCStrongByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
- llvm::Value *srcField) {
+ llvm::Value *srcField) override {
// Do a "move" by copying the value and then zeroing out the old
// variable.
@@ -1665,11 +1657,11 @@
store->setAlignment(Alignment.getQuantity());
}
- void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+ void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
}
- void profileImpl(llvm::FoldingSetNodeID &id) const {
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
// 1 is distinguishable from all pointers and byref flags
id.AddInteger(1);
}
@@ -1682,7 +1674,7 @@
ARCStrongBlockByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
- llvm::Value *srcField) {
+ llvm::Value *srcField) override {
// Do the copy with objc_retainBlock; that's all that
// _Block_object_assign would do anyway, and we'd have to pass the
// right arguments to make sure it doesn't get no-op'ed.
@@ -1695,11 +1687,11 @@
store->setAlignment(Alignment.getQuantity());
}
- void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+ void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
}
- void profileImpl(llvm::FoldingSetNodeID &id) const {
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
// 2 is distinguishable from all pointers and byref flags
id.AddInteger(2);
}
@@ -1716,20 +1708,20 @@
const Expr *copyExpr)
: ByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
- bool needsCopy() const { return CopyExpr != 0; }
+ bool needsCopy() const override { return CopyExpr != 0; }
void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
- llvm::Value *srcField) {
+ llvm::Value *srcField) override {
if (!CopyExpr) return;
CGF.EmitSynthesizedCXXCopyCtor(destField, srcField, CopyExpr);
}
- void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+ void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
CGF.PushDestructorCleanup(VarType, field);
CGF.PopCleanupBlocks(cleanupDepth);
}
- void profileImpl(llvm::FoldingSetNodeID &id) const {
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
}
};
@@ -1751,10 +1743,8 @@
ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
args.push_back(&src);
- const CGFunctionInfo &FI =
- CGF.CGM.getTypes().arrangeFunctionDeclaration(R, args,
- FunctionType::ExtInfo(),
- /*variadic*/ false);
+ const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
+ R, args, FunctionType::ExtInfo(), /*variadic=*/false);
CodeGenTypes &Types = CGF.CGM.getTypes();
llvm::FunctionType *LTy = Types.GetFunctionType(FI);
@@ -1822,10 +1812,8 @@
ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
args.push_back(&src);
- const CGFunctionInfo &FI =
- CGF.CGM.getTypes().arrangeFunctionDeclaration(R, args,
- FunctionType::ExtInfo(),
- /*variadic*/ false);
+ const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
+ R, args, FunctionType::ExtInfo(), /*variadic=*/false);
CodeGenTypes &Types = CGF.CGM.getTypes();
llvm::FunctionType *LTy = Types.GetFunctionType(FI);
@@ -2218,7 +2206,7 @@
llvm::Value *Addr;
CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Should we be passing FIELD_IS_WEAK here?
CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF);
}
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 7726ad3..5a86bdd 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -215,8 +215,11 @@
return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
case Builtin::BI__builtin_stdarg_start:
case Builtin::BI__builtin_va_start:
+ case Builtin::BI__va_start:
case Builtin::BI__builtin_va_end: {
- Value *ArgValue = EmitVAListRef(E->getArg(0));
+ Value *ArgValue = (BuiltinID == Builtin::BI__va_start)
+ ? EmitScalarExpr(E->getArg(0))
+ : EmitVAListRef(E->getArg(0));
llvm::Type *DestType = Int8PtrTy;
if (ArgValue->getType() != DestType)
ArgValue = Builder.CreateBitCast(ArgValue, DestType,
@@ -429,6 +432,12 @@
Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
return RValue::get(Builder.CreateCall(F));
}
+ case Builtin::BI__builtin___clear_cache: {
+ Value *Begin = EmitScalarExpr(E->getArg(0));
+ Value *End = EmitScalarExpr(E->getArg(1));
+ Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
+ return RValue::get(Builder.CreateCall2(F, Begin, End));
+ }
case Builtin::BI__builtin_trap: {
Value *F = CGM.getIntrinsic(Intrinsic::trap);
return RValue::get(Builder.CreateCall(F));
@@ -964,6 +973,7 @@
Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+ llvm::SequentiallyConsistent,
llvm::SequentiallyConsistent);
Result = EmitFromInt(*this, Result, T, ValueType);
return RValue::get(Result);
@@ -990,6 +1000,7 @@
Value *OldVal = Args[1];
Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+ llvm::SequentiallyConsistent,
llvm::SequentiallyConsistent);
Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
// zext bool to int.
@@ -1311,7 +1322,6 @@
llvm::Type *ArgType = Base->getType();
Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
return RValue::get(Builder.CreateCall2(F, Base, Exponent));
- break;
}
case Builtin::BIfma:
@@ -1500,6 +1510,43 @@
return RValue::get(EmitLValue(E->getArg(0)).getAddress());
case Builtin::BI__noop:
return RValue::get(0);
+ case Builtin::BI_InterlockedCompareExchange: {
+ AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(1)),
+ SequentiallyConsistent,
+ SequentiallyConsistent);
+ CXI->setVolatile(true);
+ return RValue::get(CXI);
+ }
+ case Builtin::BI_InterlockedIncrement: {
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add,
+ EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(Int32Ty, 1),
+ llvm::SequentiallyConsistent);
+ RMWI->setVolatile(true);
+ return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
+ }
+ case Builtin::BI_InterlockedDecrement: {
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Sub,
+ EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(Int32Ty, 1),
+ llvm::SequentiallyConsistent);
+ RMWI->setVolatile(true);
+ return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
+ }
+ case Builtin::BI_InterlockedExchangeAdd: {
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add,
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1)),
+ llvm::SequentiallyConsistent);
+ RMWI->setVolatile(true);
+ return RValue::get(RMWI);
+ }
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -1591,10 +1638,15 @@
const CallExpr *E) {
switch (getTarget().getTriple().getArch()) {
case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
return EmitAArch64BuiltinExpr(BuiltinID, E);
case llvm::Triple::arm:
+ case llvm::Triple::armeb:
case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
return EmitARMBuiltinExpr(BuiltinID, E);
+ case llvm::Triple::arm64:
+ return EmitARM64BuiltinExpr(BuiltinID, E);
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return EmitX86BuiltinExpr(BuiltinID, E);
@@ -1624,6 +1676,11 @@
case NeonTypeFlags::Int64:
case NeonTypeFlags::Poly64:
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
+ case NeonTypeFlags::Poly128:
+ // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+ // There is a lot of i128 and f128 API missing.
+ // so we use v16i8 to represent poly128 and get pattern matched.
+ return llvm::VectorType::get(CGF->Int8Ty, 16);
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Float64:
@@ -1694,6 +1751,36 @@
return Builder.CreateAShr(Vec, Shift, name);
}
+Value *CodeGenFunction::EmitConcatVectors(Value *Lo, Value *Hi,
+ llvm::Type *ArgTy) {
+ unsigned NumElts = ArgTy->getVectorNumElements();
+ SmallVector<Constant *, 16> Indices;
+ for (unsigned i = 0; i < 2 * NumElts; ++i)
+ Indices.push_back(ConstantInt::get(Int32Ty, i));
+
+ Constant *Mask = ConstantVector::get(Indices);
+ Value *LoCast = Builder.CreateBitCast(Lo, ArgTy);
+ Value *HiCast = Builder.CreateBitCast(Hi, ArgTy);
+ return Builder.CreateShuffleVector(LoCast, HiCast, Mask, "concat");
+}
+
+Value *CodeGenFunction::EmitExtractHigh(Value *Vec, llvm::Type *ResTy) {
+ unsigned NumElts = ResTy->getVectorNumElements();
+ SmallVector<Constant *, 8> Indices;
+
+ llvm::Type *InTy = llvm::VectorType::get(ResTy->getVectorElementType(),
+ NumElts * 2);
+ Value *VecCast = Builder.CreateBitCast(Vec, InTy);
+
+ // extract_high is a shuffle on the second half of the input indices: E.g. 4,
+ // 5, 6, 7 if we're extracting <4 x i16> from <8 x i16>.
+ for (unsigned i = 0; i < NumElts; ++i)
+ Indices.push_back(ConstantInt::get(Int32Ty, NumElts + i));
+
+ Constant *Mask = ConstantVector::get(Indices);
+ return Builder.CreateShuffleVector(VecCast, VecCast, Mask, "concat");
+}
+
/// GetPointeeAlignment - Given an expression with a pointer type, find the
/// alignment of the type referenced by the pointer. Skip over implicit
/// casts.
@@ -1749,21 +1836,1044 @@
return std::make_pair(EmitScalarExpr(Addr), Align);
}
+enum {
+ AddRetType = (1 << 0),
+ Add1ArgType = (1 << 1),
+ Add2ArgTypes = (1 << 2),
+
+ VectorizeRetType = (1 << 3),
+ VectorizeArgTypes = (1 << 4),
+
+ InventFloatType = (1 << 5),
+ UnsignedAlts = (1 << 6),
+
+ Use64BitVectors = (1 << 7),
+ Use128BitVectors = (1 << 8),
+
+ Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+ VectorRet = AddRetType | VectorizeRetType,
+ VectorRetGetArgs01 =
+ AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+ FpCmpzModifiers =
+ AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
+};
+
+ struct NeonIntrinsicInfo {
+ unsigned BuiltinID;
+ unsigned LLVMIntrinsic;
+ unsigned AltLLVMIntrinsic;
+ const char *NameHint;
+ unsigned TypeModifier;
+
+ bool operator<(unsigned RHSBuiltinID) const {
+ return BuiltinID < RHSBuiltinID;
+ }
+};
+
+#define NEONMAP0(NameBase) \
+ { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ { NEON:: BI__builtin_neon_ ## NameBase, \
+ Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
+ { NEON:: BI__builtin_neon_ ## NameBase, \
+ Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
+ #NameBase, TypeModifier }
+
+static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
+ NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType),
+ NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType),
+ NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0),
+ NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0),
+ NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0),
+ NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers),
+ NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers),
+ NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01),
+ NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers),
+ NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers),
+ NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
+ NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers),
+ NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers),
+ NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01),
+ NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers),
+ NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01),
+ NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers),
+ NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
+ NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers),
+ NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01),
+ NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers),
+ NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0),
+ NEONMAP0(vdupb_lane_i8),
+ NEONMAP0(vdupb_laneq_i8),
+ NEONMAP0(vdupd_lane_f64),
+ NEONMAP0(vdupd_lane_i64),
+ NEONMAP0(vdupd_laneq_f64),
+ NEONMAP0(vdupd_laneq_i64),
+ NEONMAP0(vduph_lane_i16),
+ NEONMAP0(vduph_laneq_i16),
+ NEONMAP0(vdups_lane_f32),
+ NEONMAP0(vdups_lane_i32),
+ NEONMAP0(vdups_laneq_f32),
+ NEONMAP0(vdups_laneq_i32),
+ NEONMAP0(vfmad_lane_f64),
+ NEONMAP0(vfmad_laneq_f64),
+ NEONMAP0(vfmas_lane_f32),
+ NEONMAP0(vfmas_laneq_f32),
+ NEONMAP0(vget_lane_f32),
+ NEONMAP0(vget_lane_f64),
+ NEONMAP0(vget_lane_i16),
+ NEONMAP0(vget_lane_i32),
+ NEONMAP0(vget_lane_i64),
+ NEONMAP0(vget_lane_i8),
+ NEONMAP0(vgetq_lane_f32),
+ NEONMAP0(vgetq_lane_f64),
+ NEONMAP0(vgetq_lane_i16),
+ NEONMAP0(vgetq_lane_i32),
+ NEONMAP0(vgetq_lane_i64),
+ NEONMAP0(vgetq_lane_i8),
+ NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0),
+ NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0),
+ NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0),
+ NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0),
+ NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP0(vmul_n_f64),
+ NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0),
+ NEONMAP0(vmulxd_f64),
+ NEONMAP0(vmulxs_f32),
+ NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0),
+ NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0),
+ NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0),
+ NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet),
+ NEONMAP0(vqdmlalh_lane_s16),
+ NEONMAP0(vqdmlalh_laneq_s16),
+ NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet),
+ NEONMAP0(vqdmlals_lane_s32),
+ NEONMAP0(vqdmlals_laneq_s32),
+ NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet),
+ NEONMAP0(vqdmlslh_lane_s16),
+ NEONMAP0(vqdmlslh_laneq_s16),
+ NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet),
+ NEONMAP0(vqdmlsls_lane_s32),
+ NEONMAP0(vqdmlsls_laneq_s32),
+ NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet),
+ NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet),
+ NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet),
+ NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet),
+ NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet),
+ NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet),
+ NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet),
+ NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType),
+ NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType),
+ NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType),
+ NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType),
+ NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType),
+ NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType),
+ NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0),
+ NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0),
+ NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet),
+ NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet),
+ NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType),
+ NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType),
+ NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType),
+ NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType),
+ NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0),
+ NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0),
+ NEONMAP0(vset_lane_f32),
+ NEONMAP0(vset_lane_f64),
+ NEONMAP0(vset_lane_i16),
+ NEONMAP0(vset_lane_i32),
+ NEONMAP0(vset_lane_i64),
+ NEONMAP0(vset_lane_i8),
+ NEONMAP0(vsetq_lane_f32),
+ NEONMAP0(vsetq_lane_f64),
+ NEONMAP0(vsetq_lane_i16),
+ NEONMAP0(vsetq_lane_i32),
+ NEONMAP0(vsetq_lane_i64),
+ NEONMAP0(vsetq_lane_i8),
+ NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0),
+ NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0),
+ NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0),
+ NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0),
+ NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0),
+ NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0),
+ NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0),
+ NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0),
+ NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0),
+ NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0),
+ NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet),
+ NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet),
+ NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0),
+ NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0),
+ NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet),
+ NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet),
+ NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0),
+ NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0),
+ NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01),
+ NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01),
+ NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet)
+};
+
+static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+ NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vabs_v, arm_neon_vabs, 0),
+ NEONMAP1(vabsq_v, arm_neon_vabs, 0),
+ NEONMAP0(vaddhn_v),
+ NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
+ NEONMAP1(vaeseq_v, arm_neon_aese, 0),
+ NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
+ NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
+ NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vcage_v, arm_neon_vacge, 0),
+ NEONMAP1(vcageq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcale_v, arm_neon_vacge, 0),
+ NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0),
+ NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvt_s32_v),
+ NEONMAP0(vcvt_s64_v),
+ NEONMAP0(vcvt_u32_v),
+ NEONMAP0(vcvt_u64_v),
+ NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_s32_v),
+ NEONMAP0(vcvtq_s64_v),
+ NEONMAP0(vcvtq_u32_v),
+ NEONMAP0(vcvtq_u64_v),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vld1_dup_v),
+ NEONMAP1(vld1_v, arm_neon_vld1, 0),
+ NEONMAP0(vld1q_dup_v),
+ NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+ NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2_v, arm_neon_vld2, 0),
+ NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+ NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3_v, arm_neon_vld3, 0),
+ NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+ NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4_v, arm_neon_vld4, 0),
+ NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4q_v, arm_neon_vld4, 0),
+ NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP0(vmull_v),
+ NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
+ NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
+ NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
+ NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
+ NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
+ NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
+ NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
+ NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
+ NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
+ NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP1(vst1_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+ NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2_v, arm_neon_vst2, 0),
+ NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2q_v, arm_neon_vst2, 0),
+ NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3_v, arm_neon_vst3, 0),
+ NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3q_v, arm_neon_vst3, 0),
+ NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4_v, arm_neon_vst4, 0),
+ NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4q_v, arm_neon_vst4, 0),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtrn_v),
+ NEONMAP0(vtrnq_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+ NEONMAP0(vuzp_v),
+ NEONMAP0(vuzpq_v),
+ NEONMAP0(vzip_v),
+ NEONMAP0(vzipq_v)
+};
+
+static NeonIntrinsicInfo ARM64SIMDIntrinsicMap[] = {
+ NEONMAP1(vabs_v, arm64_neon_abs, 0),
+ NEONMAP1(vabsq_v, arm64_neon_abs, 0),
+ NEONMAP0(vaddhn_v),
+ NEONMAP1(vaesdq_v, arm64_crypto_aesd, 0),
+ NEONMAP1(vaeseq_v, arm64_crypto_aese, 0),
+ NEONMAP1(vaesimcq_v, arm64_crypto_aesimc, 0),
+ NEONMAP1(vaesmcq_v, arm64_crypto_aesmc, 0),
+ NEONMAP1(vcage_v, arm64_neon_facge, 0),
+ NEONMAP1(vcageq_v, arm64_neon_facge, 0),
+ NEONMAP1(vcagt_v, arm64_neon_facgt, 0),
+ NEONMAP1(vcagtq_v, arm64_neon_facgt, 0),
+ NEONMAP1(vcale_v, arm64_neon_facge, 0),
+ NEONMAP1(vcaleq_v, arm64_neon_facge, 0),
+ NEONMAP1(vcalt_v, arm64_neon_facgt, 0),
+ NEONMAP1(vcaltq_v, arm64_neon_facgt, 0),
+ NEONMAP1(vcls_v, arm64_neon_cls, Add1ArgType),
+ NEONMAP1(vclsq_v, arm64_neon_cls, Add1ArgType),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_v, arm64_neon_vcvtfp2hf, 0),
+ NEONMAP1(vcvt_f32_f16, arm64_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f32_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
+ NEONMAP2(vcvt_n_f64_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s32_v, arm64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, arm64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u32_v, arm64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, arm64_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f32_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
+ NEONMAP2(vcvtq_n_f64_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s32_v, arm64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, arm64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u32_v, arm64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, arm64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtx_f32_v, arm64_neon_fcvtxn, AddRetType | Add1ArgType),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP2(vhadd_v, arm64_neon_uhadd, arm64_neon_shadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, arm64_neon_uhadd, arm64_neon_shadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, arm64_neon_uhsub, arm64_neon_shsub, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, arm64_neon_uhsub, arm64_neon_shsub, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, arm64_neon_pmul, Add1ArgType),
+ NEONMAP1(vmulq_v, arm64_neon_pmul, Add1ArgType),
+ NEONMAP1(vpadd_v, arm64_neon_addp, Add1ArgType),
+ NEONMAP2(vpaddl_v, arm64_neon_uaddlp, arm64_neon_saddlp, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, arm64_neon_uaddlp, arm64_neon_saddlp, UnsignedAlts),
+ NEONMAP1(vpaddq_v, arm64_neon_addp, Add1ArgType),
+ NEONMAP1(vqabs_v, arm64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, arm64_neon_sqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, arm64_neon_uqadd, arm64_neon_sqadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, arm64_neon_uqadd, arm64_neon_sqadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, arm64_neon_sqdmull, arm64_neon_sqadd, 0),
+ NEONMAP2(vqdmlsl_v, arm64_neon_sqdmull, arm64_neon_sqsub, 0),
+ NEONMAP1(vqdmulh_v, arm64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_v, arm64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, arm64_neon_sqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, arm64_neon_uqxtn, arm64_neon_sqxtn, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, arm64_neon_sqxtun, Add1ArgType),
+ NEONMAP1(vqneg_v, arm64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, arm64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmulh_v, arm64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_v, arm64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, arm64_neon_uqrshl, arm64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, arm64_neon_uqrshl, arm64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, arm64_neon_uqshl, arm64_neon_sqshl, UnsignedAlts),
+ NEONMAP2(vqshl_v, arm64_neon_uqshl, arm64_neon_sqshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, arm64_neon_uqshl, arm64_neon_sqshl,UnsignedAlts),
+ NEONMAP2(vqshlq_v, arm64_neon_uqshl, arm64_neon_sqshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsub_v, arm64_neon_uqsub, arm64_neon_sqsub, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, arm64_neon_uqsub, arm64_neon_sqsub, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, arm64_neon_raddhn, Add1ArgType),
+ NEONMAP2(vrecpe_v, arm64_neon_frecpe, arm64_neon_urecpe, 0),
+ NEONMAP2(vrecpeq_v, arm64_neon_frecpe, arm64_neon_urecpe, 0),
+ NEONMAP1(vrecps_v, arm64_neon_frecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, arm64_neon_frecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, arm64_neon_urhadd, arm64_neon_srhadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, arm64_neon_urhadd, arm64_neon_srhadd, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshl_v, arm64_neon_urshl, arm64_neon_srshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, arm64_neon_urshl, arm64_neon_srshl, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrsqrte_v, arm64_neon_frsqrte, arm64_neon_ursqrte, 0),
+ NEONMAP2(vrsqrteq_v, arm64_neon_frsqrte, arm64_neon_ursqrte, 0),
+ NEONMAP1(vrsqrts_v, arm64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, arm64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, arm64_neon_rsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_v, arm64_crypto_sha1su0, 0),
+ NEONMAP1(vsha1su1q_v, arm64_crypto_sha1su1, 0),
+ NEONMAP1(vsha256h2q_v, arm64_crypto_sha256h2, 0),
+ NEONMAP1(vsha256hq_v, arm64_crypto_sha256h, 0),
+ NEONMAP1(vsha256su0q_v, arm64_crypto_sha256su0, 0),
+ NEONMAP1(vsha256su1q_v, arm64_crypto_sha256su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, arm64_neon_ushl, arm64_neon_sshl, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, arm64_neon_ushl, arm64_neon_sshl, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+};
+
+static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
+ NEONMAP1(vabdd_f64, arm64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vabds_f32, arm64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vabsd_s64, arm64_neon_abs, Add1ArgType),
+ NEONMAP1(vaddlv_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlv_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlvq_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlvq_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_f32, arm64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s32, arm64_neon_saddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_u32, arm64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_f32, arm64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_f64, arm64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s32, arm64_neon_saddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s64, arm64_neon_saddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_u32, arm64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vcaged_f64, arm64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcages_f32, arm64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcagtd_f64, arm64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcagts_f32, arm64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcaled_f64, arm64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcales_f32, arm64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcaltd_f64, arm64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcalts_f32, arm64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_s64_f64, arm64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_u64_f64, arm64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_s32_f32, arm64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_u32_f32, arm64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_f64_s64, arm64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_f64_u64, arm64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_s64_f64, arm64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_u64_f64, arm64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_s64_f64, arm64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_u64_f64, arm64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_s32_f32, arm64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_u32_f32, arm64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_s64_f64, arm64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_u64_f64, arm64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_s32_f32, arm64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_u32_f32, arm64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_s64_f64, arm64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_u64_f64, arm64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_s32_f32, arm64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_u32_f32, arm64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_f32_s32, arm64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_f32_u32, arm64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_s32_f32, arm64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_u32_f32, arm64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtxd_f32_f64, arm64_sisd_fcvtxn, 0),
+ NEONMAP1(vmaxnmv_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s32, arm64_neon_smaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_u32, arm64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s32, arm64_neon_smaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_u32, arm64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmv_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s32, arm64_neon_sminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s32, arm64_neon_sminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vmull_p64, arm64_neon_pmull64, 0),
+ NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnmqd_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnms_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxqd_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxs_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnmqd_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnms_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminqd_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmins_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vqabsb_s8, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqabsd_s64, arm64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqabsh_s16, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqabss_s32, arm64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqaddb_s8, arm64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddb_u8, arm64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddd_s64, arm64_neon_sqadd, Add1ArgType),
+ NEONMAP1(vqaddd_u64, arm64_neon_uqadd, Add1ArgType),
+ NEONMAP1(vqaddh_s16, arm64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddh_u16, arm64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqadds_s32, arm64_neon_sqadd, Add1ArgType),
+ NEONMAP1(vqadds_u32, arm64_neon_uqadd, Add1ArgType),
+ NEONMAP1(vqdmulhh_s16, arm64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqdmulhs_s32, arm64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmullh_s16, arm64_neon_sqdmull, VectorRet | Use128BitVectors),
+ NEONMAP1(vqdmulls_s32, arm64_neon_sqdmulls_scalar, 0),
+ NEONMAP1(vqmovnd_s64, arm64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovnd_u64, arm64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovnh_s16, arm64_neon_sqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovnh_u16, arm64_neon_uqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovns_s32, arm64_neon_sqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovns_u32, arm64_neon_uqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovund_s64, arm64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovunh_s16, arm64_neon_sqxtun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovuns_s32, arm64_neon_sqxtun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqnegb_s8, arm64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqnegd_s64, arm64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqnegh_s16, arm64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqnegs_s32, arm64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmulhh_s16, arm64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrdmulhs_s32, arm64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrshlb_s8, arm64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshlb_u8, arm64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshld_s64, arm64_neon_sqrshl, Add1ArgType),
+ NEONMAP1(vqrshld_u64, arm64_neon_uqrshl, Add1ArgType),
+ NEONMAP1(vqrshlh_s16, arm64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshlh_u16, arm64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshls_s32, arm64_neon_sqrshl, Add1ArgType),
+ NEONMAP1(vqrshls_u32, arm64_neon_uqrshl, Add1ArgType),
+ NEONMAP1(vqrshrnd_n_s64, arm64_neon_sqrshrn, AddRetType),
+ NEONMAP1(vqrshrnd_n_u64, arm64_neon_uqrshrn, AddRetType),
+ NEONMAP1(vqrshrnh_n_s16, arm64_neon_sqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrnh_n_u16, arm64_neon_uqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrns_n_s32, arm64_neon_sqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrns_n_u32, arm64_neon_uqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrund_n_s64, arm64_neon_sqrshrun, AddRetType),
+ NEONMAP1(vqrshrunh_n_s16, arm64_neon_sqrshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshruns_n_s32, arm64_neon_sqrshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshlb_n_s8, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_n_u8, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_s8, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_u8, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshld_s64, arm64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshld_u64, arm64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshlh_n_s16, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_n_u16, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_s16, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_u16, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshls_n_s32, arm64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshls_n_u32, arm64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshls_s32, arm64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshls_u32, arm64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshlub_n_s8, arm64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshluh_n_s16, arm64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlus_n_s32, arm64_neon_sqshlu, Add1ArgType),
+ NEONMAP1(vqshrnd_n_s64, arm64_neon_sqshrn, AddRetType),
+ NEONMAP1(vqshrnd_n_u64, arm64_neon_uqshrn, AddRetType),
+ NEONMAP1(vqshrnh_n_s16, arm64_neon_sqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrnh_n_u16, arm64_neon_uqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrns_n_s32, arm64_neon_sqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrns_n_u32, arm64_neon_uqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrund_n_s64, arm64_neon_sqshrun, AddRetType),
+ NEONMAP1(vqshrunh_n_s16, arm64_neon_sqshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshruns_n_s32, arm64_neon_sqshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqsubb_s8, arm64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubb_u8, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubd_s64, arm64_neon_sqsub, Add1ArgType),
+ NEONMAP1(vqsubd_u64, arm64_neon_uqsub, Add1ArgType),
+ NEONMAP1(vqsubh_s16, arm64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubh_u16, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubs_s32, arm64_neon_sqsub, Add1ArgType),
+ NEONMAP1(vqsubs_u32, arm64_neon_uqsub, Add1ArgType),
+ NEONMAP1(vrecped_f64, arm64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpes_f32, arm64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpxd_f64, arm64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrecpxs_f32, arm64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrshld_s64, arm64_neon_srshl, Add1ArgType),
+ NEONMAP1(vrshld_u64, arm64_neon_urshl, Add1ArgType),
+ NEONMAP1(vrsqrted_f64, arm64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtes_f32, arm64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtsd_f64, arm64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtss_f32, arm64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vsha1cq_u32, arm64_crypto_sha1c, 0),
+ NEONMAP1(vsha1h_u32, arm64_crypto_sha1h, 0),
+ NEONMAP1(vsha1mq_u32, arm64_crypto_sha1m, 0),
+ NEONMAP1(vsha1pq_u32, arm64_crypto_sha1p, 0),
+ NEONMAP1(vshld_s64, arm64_neon_sshl, Add1ArgType),
+ NEONMAP1(vshld_u64, arm64_neon_ushl, Add1ArgType),
+ NEONMAP1(vslid_n_s64, arm64_neon_vsli, Vectorize1ArgType),
+ NEONMAP1(vslid_n_u64, arm64_neon_vsli, Vectorize1ArgType),
+ NEONMAP1(vsqaddb_u8, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vsqaddd_u64, arm64_neon_usqadd, Add1ArgType),
+ NEONMAP1(vsqaddh_u16, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vsqadds_u32, arm64_neon_usqadd, Add1ArgType),
+ NEONMAP1(vsrid_n_s64, arm64_neon_vsri, Vectorize1ArgType),
+ NEONMAP1(vsrid_n_u64, arm64_neon_vsri, Vectorize1ArgType),
+ NEONMAP1(vuqaddb_s8, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vuqaddd_s64, arm64_neon_suqadd, Add1ArgType),
+ NEONMAP1(vuqaddh_s16, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vuqadds_s32, arm64_neon_suqadd, Add1ArgType),
+};
+
+#undef NEONMAP0
+#undef NEONMAP1
+#undef NEONMAP2
+
+static bool NEONSIMDIntrinsicsProvenSorted = false;
+static bool AArch64SISDIntrinsicInfoProvenSorted = false;
+
+static bool ARM64SIMDIntrinsicsProvenSorted = false;
+static bool ARM64SISDIntrinsicsProvenSorted = false;
+
+
+static const NeonIntrinsicInfo *
+findNeonIntrinsicInMap(llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
+ unsigned BuiltinID, bool &MapProvenSorted) {
+
+#ifndef NDEBUG
+ if (!MapProvenSorted) {
+ // FIXME: use std::is_sorted once C++11 is allowed
+ for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
+ assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
+ MapProvenSorted = true;
+ }
+#endif
+
+ const NeonIntrinsicInfo *Builtin =
+ std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
+
+ if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
+ return Builtin;
+
+ return 0;
+}
+
+Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+ unsigned Modifier,
+ llvm::Type *ArgType,
+ const CallExpr *E) {
+ int VectorSize = 0;
+ if (Modifier & Use64BitVectors)
+ VectorSize = 64;
+ else if (Modifier & Use128BitVectors)
+ VectorSize = 128;
+
+ // Return type.
+ SmallVector<llvm::Type *, 3> Tys;
+ if (Modifier & AddRetType) {
+ llvm::Type *Ty = ConvertType(E->getCallReturnType());
+ if (Modifier & VectorizeRetType)
+ Ty = llvm::VectorType::get(
+ Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
+
+ Tys.push_back(Ty);
+ }
+
+ // Arguments.
+ if (Modifier & VectorizeArgTypes) {
+ int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
+ ArgType = llvm::VectorType::get(ArgType, Elts);
+ }
+
+ if (Modifier & (Add1ArgType | Add2ArgTypes))
+ Tys.push_back(ArgType);
+
+ if (Modifier & Add2ArgTypes)
+ Tys.push_back(ArgType);
+
+ if (Modifier & InventFloatType)
+ Tys.push_back(FloatTy);
+
+ return CGM.getIntrinsic(IntrinsicID, Tys);
+}
+
+static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
+ const NeonIntrinsicInfo &SISDInfo,
+ SmallVectorImpl<Value *> &Ops,
+ const CallExpr *E) {
+ unsigned BuiltinID = SISDInfo.BuiltinID;
+ unsigned int Int = SISDInfo.LLVMIntrinsic;
+ unsigned Modifier = SISDInfo.TypeModifier;
+ const char *s = SISDInfo.NameHint;
+
+ switch (BuiltinID) {
+ case NEON::BI__builtin_neon_vcled_s64:
+ case NEON::BI__builtin_neon_vcled_u64:
+ case NEON::BI__builtin_neon_vcles_f32:
+ case NEON::BI__builtin_neon_vcled_f64:
+ case NEON::BI__builtin_neon_vcltd_s64:
+ case NEON::BI__builtin_neon_vcltd_u64:
+ case NEON::BI__builtin_neon_vclts_f32:
+ case NEON::BI__builtin_neon_vcltd_f64:
+ case NEON::BI__builtin_neon_vcales_f32:
+ case NEON::BI__builtin_neon_vcaled_f64:
+ case NEON::BI__builtin_neon_vcalts_f32:
+ case NEON::BI__builtin_neon_vcaltd_f64:
+ // Only one direction of comparisons actually exist, cmle is actually a cmge
+ // with swapped operands. The table gives us the right intrinsic but we
+ // still need to do the swap.
+ std::swap(Ops[0], Ops[1]);
+ break;
+ }
+
+ assert(Int && "Generic code assumes a valid intrinsic");
+
+ // Determine the type(s) of this overloaded AArch64 intrinsic.
+ const Expr *Arg = E->getArg(0);
+ llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
+ Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
+
+ int j = 0;
+ ConstantInt *C0 = ConstantInt::get(CGF.Int32Ty, 0);
+ for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+ ai != ae; ++ai, ++j) {
+ llvm::Type *ArgTy = ai->getType();
+ if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
+ ArgTy->getPrimitiveSizeInBits())
+ continue;
+
+ assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
+ // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
+ // it before inserting.
+ Ops[j] =
+ CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
+ Ops[j] =
+ CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
+ }
+
+ Value *Result = CGF.EmitNeonCall(F, Ops, s);
+ llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ if (ResultType->getPrimitiveSizeInBits() <
+ Result->getType()->getPrimitiveSizeInBits())
+ return CGF.Builder.CreateExtractElement(Result, C0);
+
+ return CGF.Builder.CreateBitCast(Result, ResultType, s);
+}
+
static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
- unsigned BuiltinID,
+ const NeonIntrinsicInfo &SISDInfo,
const CallExpr *E) {
- unsigned int Int = 0;
- // Scalar result generated across vectors
- bool AcrossVec = false;
- // Extend element of one-element vector
- bool ExtendEle = false;
- bool OverloadInt = false;
- bool OverloadCmpInt = false;
- bool IsFpCmpZInt = false;
- bool OverloadCvtInt = false;
- bool OverloadWideInt = false;
- bool OverloadNarrowInt = false;
- const char *s = NULL;
+ unsigned BuiltinID = SISDInfo.BuiltinID;
+ unsigned int Int = SISDInfo.LLVMIntrinsic;
+ const char *s = SISDInfo.NameHint;
SmallVector<Value *, 4> Ops;
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
@@ -1774,20 +2884,20 @@
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
default: break;
- case AArch64::BI__builtin_neon_vdups_lane_f32:
- case AArch64::BI__builtin_neon_vdupd_lane_f64:
- case AArch64::BI__builtin_neon_vdups_laneq_f32:
- case AArch64::BI__builtin_neon_vdupd_laneq_f64: {
+ case NEON::BI__builtin_neon_vdups_lane_f32:
+ case NEON::BI__builtin_neon_vdupd_lane_f64:
+ case NEON::BI__builtin_neon_vdups_laneq_f32:
+ case NEON::BI__builtin_neon_vdupd_laneq_f64: {
return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
}
- case AArch64::BI__builtin_neon_vdupb_lane_i8:
- case AArch64::BI__builtin_neon_vduph_lane_i16:
- case AArch64::BI__builtin_neon_vdups_lane_i32:
- case AArch64::BI__builtin_neon_vdupd_lane_i64:
- case AArch64::BI__builtin_neon_vdupb_laneq_i8:
- case AArch64::BI__builtin_neon_vduph_laneq_i16:
- case AArch64::BI__builtin_neon_vdups_laneq_i32:
- case AArch64::BI__builtin_neon_vdupd_laneq_i64: {
+ case NEON::BI__builtin_neon_vdupb_lane_i8:
+ case NEON::BI__builtin_neon_vduph_lane_i16:
+ case NEON::BI__builtin_neon_vdups_lane_i32:
+ case NEON::BI__builtin_neon_vdupd_lane_i64:
+ case NEON::BI__builtin_neon_vdupb_laneq_i8:
+ case NEON::BI__builtin_neon_vduph_laneq_i16:
+ case NEON::BI__builtin_neon_vdups_laneq_i32:
+ case NEON::BI__builtin_neon_vdupd_laneq_i64: {
// The backend treats Neon scalar types as v1ix types
// So we want to dup lane from any vector to v1ix vector
// with shufflevector
@@ -1799,19 +2909,19 @@
// scalar type expected by the builtin
return CGF.Builder.CreateBitCast(Result, Ty, s);
}
- case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
- case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
- case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
- case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
- case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
- case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
- case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
- case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
+ case NEON::BI__builtin_neon_vqdmlalh_lane_s16 :
+ case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 :
+ case NEON::BI__builtin_neon_vqdmlals_lane_s32 :
+ case NEON::BI__builtin_neon_vqdmlals_laneq_s32 :
+ case NEON::BI__builtin_neon_vqdmlslh_lane_s16 :
+ case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 :
+ case NEON::BI__builtin_neon_vqdmlsls_lane_s32 :
+ case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : {
Int = Intrinsic::arm_neon_vqadds;
- if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
- BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
- BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
- BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
+ if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) {
Int = Intrinsic::arm_neon_vqsubs;
}
// create vqdmull call with b * c[i]
@@ -1839,23 +2949,23 @@
Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
return CGF.Builder.CreateBitCast(AddRes, Ty);
}
- case AArch64::BI__builtin_neon_vfmas_lane_f32:
- case AArch64::BI__builtin_neon_vfmas_laneq_f32:
- case AArch64::BI__builtin_neon_vfmad_lane_f64:
- case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
+ case NEON::BI__builtin_neon_vfmas_lane_f32:
+ case NEON::BI__builtin_neon_vfmas_laneq_f32:
+ case NEON::BI__builtin_neon_vfmad_lane_f64:
+ case NEON::BI__builtin_neon_vfmad_laneq_f64: {
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
// Scalar Floating-point Multiply Extended
- case AArch64::BI__builtin_neon_vmulxs_f32:
- case AArch64::BI__builtin_neon_vmulxd_f64: {
+ case NEON::BI__builtin_neon_vmulxs_f32:
+ case NEON::BI__builtin_neon_vmulxd_f64: {
Int = Intrinsic::aarch64_neon_vmulx;
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
- case AArch64::BI__builtin_neon_vmul_n_f64: {
+ case NEON::BI__builtin_neon_vmul_n_f64: {
// v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane
llvm::Type *VTy = GetNeonType(&CGF,
NeonTypeFlags(NeonTypeFlags::Float64, false, false));
@@ -1865,788 +2975,523 @@
Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
return CGF.Builder.CreateBitCast(Result, VTy);
}
- case AArch64::BI__builtin_neon_vget_lane_i8:
- case AArch64::BI__builtin_neon_vget_lane_i16:
- case AArch64::BI__builtin_neon_vget_lane_i32:
- case AArch64::BI__builtin_neon_vget_lane_i64:
- case AArch64::BI__builtin_neon_vget_lane_f32:
- case AArch64::BI__builtin_neon_vget_lane_f64:
- case AArch64::BI__builtin_neon_vgetq_lane_i8:
- case AArch64::BI__builtin_neon_vgetq_lane_i16:
- case AArch64::BI__builtin_neon_vgetq_lane_i32:
- case AArch64::BI__builtin_neon_vgetq_lane_i64:
- case AArch64::BI__builtin_neon_vgetq_lane_f32:
- case AArch64::BI__builtin_neon_vgetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
- case AArch64::BI__builtin_neon_vset_lane_i8:
- case AArch64::BI__builtin_neon_vset_lane_i16:
- case AArch64::BI__builtin_neon_vset_lane_i32:
- case AArch64::BI__builtin_neon_vset_lane_i64:
- case AArch64::BI__builtin_neon_vset_lane_f32:
- case AArch64::BI__builtin_neon_vset_lane_f64:
- case AArch64::BI__builtin_neon_vsetq_lane_i8:
- case AArch64::BI__builtin_neon_vsetq_lane_i16:
- case AArch64::BI__builtin_neon_vsetq_lane_i32:
- case AArch64::BI__builtin_neon_vsetq_lane_i64:
- case AArch64::BI__builtin_neon_vsetq_lane_f32:
- case AArch64::BI__builtin_neon_vsetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
- // Crypto
- case AArch64::BI__builtin_neon_vsha1h_u32:
- Int = Intrinsic::arm_neon_sha1h;
- s = "sha1h"; OverloadInt = true; break;
- case AArch64::BI__builtin_neon_vsha1cq_u32:
- Int = Intrinsic::aarch64_neon_sha1c;
- s = "sha1c"; break;
- case AArch64::BI__builtin_neon_vsha1pq_u32:
- Int = Intrinsic::aarch64_neon_sha1p;
- s = "sha1p"; break;
- case AArch64::BI__builtin_neon_vsha1mq_u32:
- Int = Intrinsic::aarch64_neon_sha1m;
- s = "sha1m"; break;
- // Scalar Add
- case AArch64::BI__builtin_neon_vaddd_s64:
- Int = Intrinsic::aarch64_neon_vaddds;
- s = "vaddds"; break;
- case AArch64::BI__builtin_neon_vaddd_u64:
- Int = Intrinsic::aarch64_neon_vadddu;
- s = "vadddu"; break;
- // Scalar Sub
- case AArch64::BI__builtin_neon_vsubd_s64:
- Int = Intrinsic::aarch64_neon_vsubds;
- s = "vsubds"; break;
- case AArch64::BI__builtin_neon_vsubd_u64:
- Int = Intrinsic::aarch64_neon_vsubdu;
- s = "vsubdu"; break;
- // Scalar Saturating Add
- case AArch64::BI__builtin_neon_vqaddb_s8:
- case AArch64::BI__builtin_neon_vqaddh_s16:
- case AArch64::BI__builtin_neon_vqadds_s32:
- case AArch64::BI__builtin_neon_vqaddd_s64:
- Int = Intrinsic::arm_neon_vqadds;
- s = "vqadds"; OverloadInt = true; break;
- case AArch64::BI__builtin_neon_vqaddb_u8:
- case AArch64::BI__builtin_neon_vqaddh_u16:
- case AArch64::BI__builtin_neon_vqadds_u32:
- case AArch64::BI__builtin_neon_vqaddd_u64:
- Int = Intrinsic::arm_neon_vqaddu;
- s = "vqaddu"; OverloadInt = true; break;
- // Scalar Saturating Sub
- case AArch64::BI__builtin_neon_vqsubb_s8:
- case AArch64::BI__builtin_neon_vqsubh_s16:
- case AArch64::BI__builtin_neon_vqsubs_s32:
- case AArch64::BI__builtin_neon_vqsubd_s64:
- Int = Intrinsic::arm_neon_vqsubs;
- s = "vqsubs"; OverloadInt = true; break;
- case AArch64::BI__builtin_neon_vqsubb_u8:
- case AArch64::BI__builtin_neon_vqsubh_u16:
- case AArch64::BI__builtin_neon_vqsubs_u32:
- case AArch64::BI__builtin_neon_vqsubd_u64:
- Int = Intrinsic::arm_neon_vqsubu;
- s = "vqsubu"; OverloadInt = true; break;
- // Scalar Shift Left
- case AArch64::BI__builtin_neon_vshld_s64:
- Int = Intrinsic::aarch64_neon_vshlds;
- s = "vshlds"; break;
- case AArch64::BI__builtin_neon_vshld_u64:
- Int = Intrinsic::aarch64_neon_vshldu;
- s = "vshldu"; break;
- // Scalar Saturating Shift Left
- case AArch64::BI__builtin_neon_vqshlb_s8:
- case AArch64::BI__builtin_neon_vqshlh_s16:
- case AArch64::BI__builtin_neon_vqshls_s32:
- case AArch64::BI__builtin_neon_vqshld_s64:
- Int = Intrinsic::aarch64_neon_vqshls;
- s = "vqshls"; OverloadInt = true; break;
- case AArch64::BI__builtin_neon_vqshlb_u8:
- case AArch64::BI__builtin_neon_vqshlh_u16:
- case AArch64::BI__builtin_neon_vqshls_u32:
- case AArch64::BI__builtin_neon_vqshld_u64:
- Int = Intrinsic::aarch64_neon_vqshlu;
- s = "vqshlu"; OverloadInt = true; break;
- // Scalar Rouding Shift Left
- case AArch64::BI__builtin_neon_vrshld_s64:
- Int = Intrinsic::aarch64_neon_vrshlds;
- s = "vrshlds"; break;
- case AArch64::BI__builtin_neon_vrshld_u64:
- Int = Intrinsic::aarch64_neon_vrshldu;
- s = "vrshldu"; break;
- // Scalar Saturating Rouding Shift Left
- case AArch64::BI__builtin_neon_vqrshlb_s8:
- case AArch64::BI__builtin_neon_vqrshlh_s16:
- case AArch64::BI__builtin_neon_vqrshls_s32:
- case AArch64::BI__builtin_neon_vqrshld_s64:
- Int = Intrinsic::aarch64_neon_vqrshls;
- s = "vqrshls"; OverloadInt = true; break;
- case AArch64::BI__builtin_neon_vqrshlb_u8:
- case AArch64::BI__builtin_neon_vqrshlh_u16:
- case AArch64::BI__builtin_neon_vqrshls_u32:
- case AArch64::BI__builtin_neon_vqrshld_u64:
- Int = Intrinsic::aarch64_neon_vqrshlu;
- s = "vqrshlu"; OverloadInt = true; break;
- // Scalar Reduce Pairwise Add
- case AArch64::BI__builtin_neon_vpaddd_s64:
- case AArch64::BI__builtin_neon_vpaddd_u64:
- Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
- break;
- case AArch64::BI__builtin_neon_vpadds_f32:
- Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
- break;
- case AArch64::BI__builtin_neon_vpaddd_f64:
- Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
- break;
- // Scalar Reduce Pairwise Floating Point Max
- case AArch64::BI__builtin_neon_vpmaxs_f32:
- Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
- break;
- case AArch64::BI__builtin_neon_vpmaxqd_f64:
- Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
- break;
- // Scalar Reduce Pairwise Floating Point Min
- case AArch64::BI__builtin_neon_vpmins_f32:
- Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
- break;
- case AArch64::BI__builtin_neon_vpminqd_f64:
- Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
- break;
- // Scalar Reduce Pairwise Floating Point Maxnm
- case AArch64::BI__builtin_neon_vpmaxnms_f32:
- Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
- break;
- case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
- Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
- break;
- // Scalar Reduce Pairwise Floating Point Minnm
- case AArch64::BI__builtin_neon_vpminnms_f32:
- Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
- break;
- case AArch64::BI__builtin_neon_vpminnmqd_f64:
- Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
- break;
- // The followings are intrinsics with scalar results generated AcrossVec vectors
- case AArch64::BI__builtin_neon_vaddlv_s8:
- case AArch64::BI__builtin_neon_vaddlv_s16:
- case AArch64::BI__builtin_neon_vaddlvq_s8:
- case AArch64::BI__builtin_neon_vaddlvq_s16:
- case AArch64::BI__builtin_neon_vaddlvq_s32:
- Int = Intrinsic::aarch64_neon_saddlv;
- AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
- case AArch64::BI__builtin_neon_vaddlv_u8:
- case AArch64::BI__builtin_neon_vaddlv_u16:
- case AArch64::BI__builtin_neon_vaddlvq_u8:
- case AArch64::BI__builtin_neon_vaddlvq_u16:
- case AArch64::BI__builtin_neon_vaddlvq_u32:
- Int = Intrinsic::aarch64_neon_uaddlv;
- AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
- case AArch64::BI__builtin_neon_vmaxv_s8:
- case AArch64::BI__builtin_neon_vmaxv_s16:
- case AArch64::BI__builtin_neon_vmaxvq_s8:
- case AArch64::BI__builtin_neon_vmaxvq_s16:
- case AArch64::BI__builtin_neon_vmaxvq_s32:
- Int = Intrinsic::aarch64_neon_smaxv;
- AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
- case AArch64::BI__builtin_neon_vmaxv_u8:
- case AArch64::BI__builtin_neon_vmaxv_u16:
- case AArch64::BI__builtin_neon_vmaxvq_u8:
- case AArch64::BI__builtin_neon_vmaxvq_u16:
- case AArch64::BI__builtin_neon_vmaxvq_u32:
- Int = Intrinsic::aarch64_neon_umaxv;
- AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
- case AArch64::BI__builtin_neon_vminv_s8:
- case AArch64::BI__builtin_neon_vminv_s16:
- case AArch64::BI__builtin_neon_vminvq_s8:
- case AArch64::BI__builtin_neon_vminvq_s16:
- case AArch64::BI__builtin_neon_vminvq_s32:
- Int = Intrinsic::aarch64_neon_sminv;
- AcrossVec = true; ExtendEle = false; s = "sminv"; break;
- case AArch64::BI__builtin_neon_vminv_u8:
- case AArch64::BI__builtin_neon_vminv_u16:
- case AArch64::BI__builtin_neon_vminvq_u8:
- case AArch64::BI__builtin_neon_vminvq_u16:
- case AArch64::BI__builtin_neon_vminvq_u32:
- Int = Intrinsic::aarch64_neon_uminv;
- AcrossVec = true; ExtendEle = false; s = "uminv"; break;
- case AArch64::BI__builtin_neon_vaddv_s8:
- case AArch64::BI__builtin_neon_vaddv_s16:
- case AArch64::BI__builtin_neon_vaddvq_s8:
- case AArch64::BI__builtin_neon_vaddvq_s16:
- case AArch64::BI__builtin_neon_vaddvq_s32:
- case AArch64::BI__builtin_neon_vaddvq_s64:
- case AArch64::BI__builtin_neon_vaddv_u8:
- case AArch64::BI__builtin_neon_vaddv_u16:
- case AArch64::BI__builtin_neon_vaddvq_u8:
- case AArch64::BI__builtin_neon_vaddvq_u16:
- case AArch64::BI__builtin_neon_vaddvq_u32:
- case AArch64::BI__builtin_neon_vaddvq_u64:
- case AArch64::BI__builtin_neon_vaddv_f32:
- case AArch64::BI__builtin_neon_vaddvq_f32:
- case AArch64::BI__builtin_neon_vaddvq_f64:
- Int = Intrinsic::aarch64_neon_vaddv;
- AcrossVec = true; ExtendEle = false; s = "vaddv"; break;
- case AArch64::BI__builtin_neon_vmaxv_f32:
- case AArch64::BI__builtin_neon_vmaxvq_f32:
- case AArch64::BI__builtin_neon_vmaxvq_f64:
- Int = Intrinsic::aarch64_neon_vmaxv;
- AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
- case AArch64::BI__builtin_neon_vminv_f32:
- case AArch64::BI__builtin_neon_vminvq_f32:
- case AArch64::BI__builtin_neon_vminvq_f64:
- Int = Intrinsic::aarch64_neon_vminv;
- AcrossVec = true; ExtendEle = false; s = "vminv"; break;
- case AArch64::BI__builtin_neon_vmaxnmv_f32:
- case AArch64::BI__builtin_neon_vmaxnmvq_f32:
- case AArch64::BI__builtin_neon_vmaxnmvq_f64:
- Int = Intrinsic::aarch64_neon_vmaxnmv;
- AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
- case AArch64::BI__builtin_neon_vminnmv_f32:
- case AArch64::BI__builtin_neon_vminnmvq_f32:
- case AArch64::BI__builtin_neon_vminnmvq_f64:
- Int = Intrinsic::aarch64_neon_vminnmv;
- AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
- // Scalar Integer Saturating Doubling Multiply Half High
- case AArch64::BI__builtin_neon_vqdmulhh_s16:
- case AArch64::BI__builtin_neon_vqdmulhs_s32:
- Int = Intrinsic::arm_neon_vqdmulh;
- s = "vqdmulh"; OverloadInt = true; break;
- // Scalar Integer Saturating Rounding Doubling Multiply Half High
- case AArch64::BI__builtin_neon_vqrdmulhh_s16:
- case AArch64::BI__builtin_neon_vqrdmulhs_s32:
- Int = Intrinsic::arm_neon_vqrdmulh;
- s = "vqrdmulh"; OverloadInt = true; break;
- // Scalar Floating-point Reciprocal Step and
- case AArch64::BI__builtin_neon_vrecpss_f32:
- case AArch64::BI__builtin_neon_vrecpsd_f64:
- Int = Intrinsic::arm_neon_vrecps;
- s = "vrecps"; OverloadInt = true; break;
- // Scalar Floating-point Reciprocal Square Root Step
- case AArch64::BI__builtin_neon_vrsqrtss_f32:
- case AArch64::BI__builtin_neon_vrsqrtsd_f64:
- Int = Intrinsic::arm_neon_vrsqrts;
- s = "vrsqrts"; OverloadInt = true; break;
- // Scalar Signed Integer Convert To Floating-point
- case AArch64::BI__builtin_neon_vcvts_f32_s32:
- Int = Intrinsic::aarch64_neon_vcvtf32_s32,
- s = "vcvtf"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_f64_s64:
- Int = Intrinsic::aarch64_neon_vcvtf64_s64,
- s = "vcvtf"; OverloadInt = false; break;
- // Scalar Unsigned Integer Convert To Floating-point
- case AArch64::BI__builtin_neon_vcvts_f32_u32:
- Int = Intrinsic::aarch64_neon_vcvtf32_u32,
- s = "vcvtf"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_f64_u64:
- Int = Intrinsic::aarch64_neon_vcvtf64_u64,
- s = "vcvtf"; OverloadInt = false; break;
- // Scalar Floating-point Converts
- case AArch64::BI__builtin_neon_vcvtxd_f32_f64:
- Int = Intrinsic::aarch64_neon_fcvtxn;
- s = "vcvtxn"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtas_s32_f32:
- case AArch64::BI__builtin_neon_vcvtad_s64_f64:
- Int = Intrinsic::aarch64_neon_fcvtas;
- s = "vcvtas"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtas_u32_f32:
- case AArch64::BI__builtin_neon_vcvtad_u64_f64:
- Int = Intrinsic::aarch64_neon_fcvtau;
- s = "vcvtau"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtms_s32_f32:
- case AArch64::BI__builtin_neon_vcvtmd_s64_f64:
- Int = Intrinsic::aarch64_neon_fcvtms;
- s = "vcvtms"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtms_u32_f32:
- case AArch64::BI__builtin_neon_vcvtmd_u64_f64:
- Int = Intrinsic::aarch64_neon_fcvtmu;
- s = "vcvtmu"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtns_s32_f32:
- case AArch64::BI__builtin_neon_vcvtnd_s64_f64:
- Int = Intrinsic::aarch64_neon_fcvtns;
- s = "vcvtns"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtns_u32_f32:
- case AArch64::BI__builtin_neon_vcvtnd_u64_f64:
- Int = Intrinsic::aarch64_neon_fcvtnu;
- s = "vcvtnu"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtps_s32_f32:
- case AArch64::BI__builtin_neon_vcvtpd_s64_f64:
- Int = Intrinsic::aarch64_neon_fcvtps;
- s = "vcvtps"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvtps_u32_f32:
- case AArch64::BI__builtin_neon_vcvtpd_u64_f64:
- Int = Intrinsic::aarch64_neon_fcvtpu;
- s = "vcvtpu"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvts_s32_f32:
- case AArch64::BI__builtin_neon_vcvtd_s64_f64:
- Int = Intrinsic::aarch64_neon_fcvtzs;
- s = "vcvtzs"; OverloadCvtInt = true; break;
- case AArch64::BI__builtin_neon_vcvts_u32_f32:
- case AArch64::BI__builtin_neon_vcvtd_u64_f64:
- Int = Intrinsic::aarch64_neon_fcvtzu;
- s = "vcvtzu"; OverloadCvtInt = true; break;
- // Scalar Floating-point Reciprocal Estimate
- case AArch64::BI__builtin_neon_vrecpes_f32:
- case AArch64::BI__builtin_neon_vrecped_f64:
- Int = Intrinsic::arm_neon_vrecpe;
- s = "vrecpe"; OverloadInt = true; break;
- // Scalar Floating-point Reciprocal Exponent
- case AArch64::BI__builtin_neon_vrecpxs_f32:
- case AArch64::BI__builtin_neon_vrecpxd_f64:
- Int = Intrinsic::aarch64_neon_vrecpx;
- s = "vrecpx"; OverloadInt = true; break;
- // Scalar Floating-point Reciprocal Square Root Estimate
- case AArch64::BI__builtin_neon_vrsqrtes_f32:
- case AArch64::BI__builtin_neon_vrsqrted_f64:
- Int = Intrinsic::arm_neon_vrsqrte;
- s = "vrsqrte"; OverloadInt = true; break;
- // Scalar Compare Equal
- case AArch64::BI__builtin_neon_vceqd_s64:
- case AArch64::BI__builtin_neon_vceqd_u64:
- Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
- OverloadCmpInt = true; break;
- // Scalar Compare Equal To Zero
- case AArch64::BI__builtin_neon_vceqzd_s64:
- case AArch64::BI__builtin_neon_vceqzd_u64:
- Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
+ case NEON::BI__builtin_neon_vget_lane_i8:
+ case NEON::BI__builtin_neon_vget_lane_i16:
+ case NEON::BI__builtin_neon_vget_lane_i32:
+ case NEON::BI__builtin_neon_vget_lane_i64:
+ case NEON::BI__builtin_neon_vget_lane_f32:
+ case NEON::BI__builtin_neon_vget_lane_f64:
+ case NEON::BI__builtin_neon_vgetq_lane_i8:
+ case NEON::BI__builtin_neon_vgetq_lane_i16:
+ case NEON::BI__builtin_neon_vgetq_lane_i32:
+ case NEON::BI__builtin_neon_vgetq_lane_i64:
+ case NEON::BI__builtin_neon_vgetq_lane_f32:
+ case NEON::BI__builtin_neon_vgetq_lane_f64:
+ return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E);
+ case NEON::BI__builtin_neon_vset_lane_i8:
+ case NEON::BI__builtin_neon_vset_lane_i16:
+ case NEON::BI__builtin_neon_vset_lane_i32:
+ case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_f32:
+ case NEON::BI__builtin_neon_vset_lane_f64:
+ case NEON::BI__builtin_neon_vsetq_lane_i8:
+ case NEON::BI__builtin_neon_vsetq_lane_i16:
+ case NEON::BI__builtin_neon_vsetq_lane_i32:
+ case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_f32:
+ case NEON::BI__builtin_neon_vsetq_lane_f64:
+ return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
+
+ case NEON::BI__builtin_neon_vceqzd_s64:
+ case NEON::BI__builtin_neon_vceqzd_u64:
+ case NEON::BI__builtin_neon_vcgezd_s64:
+ case NEON::BI__builtin_neon_vcgtzd_s64:
+ case NEON::BI__builtin_neon_vclezd_s64:
+ case NEON::BI__builtin_neon_vcltzd_s64:
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadCmpInt = true; break;
- // Scalar Compare Greater Than or Equal
- case AArch64::BI__builtin_neon_vcged_s64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadCmpInt = true; break;
- case AArch64::BI__builtin_neon_vcged_u64:
- Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
- OverloadCmpInt = true; break;
- // Scalar Compare Greater Than or Equal To Zero
- case AArch64::BI__builtin_neon_vcgezd_s64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadCmpInt = true; break;
- // Scalar Compare Greater Than
- case AArch64::BI__builtin_neon_vcgtd_s64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadCmpInt = true; break;
- case AArch64::BI__builtin_neon_vcgtd_u64:
- Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
- OverloadCmpInt = true; break;
- // Scalar Compare Greater Than Zero
- case AArch64::BI__builtin_neon_vcgtzd_s64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadCmpInt = true; break;
- // Scalar Compare Less Than or Equal
- case AArch64::BI__builtin_neon_vcled_s64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- case AArch64::BI__builtin_neon_vcled_u64:
- Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- // Scalar Compare Less Than or Equal To Zero
- case AArch64::BI__builtin_neon_vclezd_s64:
- Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadCmpInt = true; break;
- // Scalar Compare Less Than
- case AArch64::BI__builtin_neon_vcltd_s64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- case AArch64::BI__builtin_neon_vcltd_u64:
- Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- // Scalar Compare Less Than Zero
- case AArch64::BI__builtin_neon_vcltzd_s64:
- Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Equal
- case AArch64::BI__builtin_neon_vceqs_f32:
- case AArch64::BI__builtin_neon_vceqd_f64:
- Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Equal To Zero
- case AArch64::BI__builtin_neon_vceqzs_f32:
- case AArch64::BI__builtin_neon_vceqzd_f64:
- Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
+ break;
+ case NEON::BI__builtin_neon_vceqzs_f32:
+ case NEON::BI__builtin_neon_vceqzd_f64:
+ case NEON::BI__builtin_neon_vcgezs_f32:
+ case NEON::BI__builtin_neon_vcgezd_f64:
+ case NEON::BI__builtin_neon_vcgtzs_f32:
+ case NEON::BI__builtin_neon_vcgtzd_f64:
+ case NEON::BI__builtin_neon_vclezs_f32:
+ case NEON::BI__builtin_neon_vclezd_f64:
+ case NEON::BI__builtin_neon_vcltzs_f32:
+ case NEON::BI__builtin_neon_vcltzd_f64:
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- IsFpCmpZInt = true;
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Greater Than Or Equal
- case AArch64::BI__builtin_neon_vcges_f32:
- case AArch64::BI__builtin_neon_vcged_f64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Greater Than Or Equal To Zero
- case AArch64::BI__builtin_neon_vcgezs_f32:
- case AArch64::BI__builtin_neon_vcgezd_f64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- IsFpCmpZInt = true;
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Greather Than
- case AArch64::BI__builtin_neon_vcgts_f32:
- case AArch64::BI__builtin_neon_vcgtd_f64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Greather Than Zero
- case AArch64::BI__builtin_neon_vcgtzs_f32:
- case AArch64::BI__builtin_neon_vcgtzd_f64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- IsFpCmpZInt = true;
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Less Than or Equal
- case AArch64::BI__builtin_neon_vcles_f32:
- case AArch64::BI__builtin_neon_vcled_f64:
- Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Less Than Or Equal To Zero
- case AArch64::BI__builtin_neon_vclezs_f32:
- case AArch64::BI__builtin_neon_vclezd_f64:
- Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- IsFpCmpZInt = true;
- OverloadCmpInt = true; break;
- // Scalar Floating-point Compare Less Than Zero
- case AArch64::BI__builtin_neon_vclts_f32:
- case AArch64::BI__builtin_neon_vcltd_f64:
- Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- // Scalar Floating-point Compare Less Than Zero
- case AArch64::BI__builtin_neon_vcltzs_f32:
- case AArch64::BI__builtin_neon_vcltzd_f64:
- Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- IsFpCmpZInt = true;
- OverloadCmpInt = true; break;
- // Scalar Floating-point Absolute Compare Greater Than Or Equal
- case AArch64::BI__builtin_neon_vcages_f32:
- case AArch64::BI__builtin_neon_vcaged_f64:
- Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Absolute Compare Greater Than
- case AArch64::BI__builtin_neon_vcagts_f32:
- case AArch64::BI__builtin_neon_vcagtd_f64:
- Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
- OverloadCmpInt = true; break;
- // Scalar Floating-point Absolute Compare Less Than Or Equal
- case AArch64::BI__builtin_neon_vcales_f32:
- case AArch64::BI__builtin_neon_vcaled_f64:
- Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- // Scalar Floating-point Absolute Compare Less Than
- case AArch64::BI__builtin_neon_vcalts_f32:
- case AArch64::BI__builtin_neon_vcaltd_f64:
- Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
- OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
- // Scalar Compare Bitwise Test Bits
- case AArch64::BI__builtin_neon_vtstd_s64:
- case AArch64::BI__builtin_neon_vtstd_u64:
- Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
- OverloadCmpInt = true; break;
- // Scalar Absolute Value
- case AArch64::BI__builtin_neon_vabsd_s64:
- Int = Intrinsic::aarch64_neon_vabs;
- s = "vabs"; OverloadInt = false; break;
- // Scalar Absolute Difference
- case AArch64::BI__builtin_neon_vabds_f32:
- case AArch64::BI__builtin_neon_vabdd_f64:
- Int = Intrinsic::aarch64_neon_vabd;
- s = "vabd"; OverloadInt = true; break;
- // Scalar Signed Saturating Absolute Value
- case AArch64::BI__builtin_neon_vqabsb_s8:
- case AArch64::BI__builtin_neon_vqabsh_s16:
- case AArch64::BI__builtin_neon_vqabss_s32:
- case AArch64::BI__builtin_neon_vqabsd_s64:
- Int = Intrinsic::arm_neon_vqabs;
- s = "vqabs"; OverloadInt = true; break;
- // Scalar Negate
- case AArch64::BI__builtin_neon_vnegd_s64:
- Int = Intrinsic::aarch64_neon_vneg;
- s = "vneg"; OverloadInt = false; break;
- // Scalar Signed Saturating Negate
- case AArch64::BI__builtin_neon_vqnegb_s8:
- case AArch64::BI__builtin_neon_vqnegh_s16:
- case AArch64::BI__builtin_neon_vqnegs_s32:
- case AArch64::BI__builtin_neon_vqnegd_s64:
- Int = Intrinsic::arm_neon_vqneg;
- s = "vqneg"; OverloadInt = true; break;
- // Scalar Signed Saturating Accumulated of Unsigned Value
- case AArch64::BI__builtin_neon_vuqaddb_s8:
- case AArch64::BI__builtin_neon_vuqaddh_s16:
- case AArch64::BI__builtin_neon_vuqadds_s32:
- case AArch64::BI__builtin_neon_vuqaddd_s64:
- Int = Intrinsic::aarch64_neon_vuqadd;
- s = "vuqadd"; OverloadInt = true; break;
- // Scalar Unsigned Saturating Accumulated of Signed Value
- case AArch64::BI__builtin_neon_vsqaddb_u8:
- case AArch64::BI__builtin_neon_vsqaddh_u16:
- case AArch64::BI__builtin_neon_vsqadds_u32:
- case AArch64::BI__builtin_neon_vsqaddd_u64:
- Int = Intrinsic::aarch64_neon_vsqadd;
- s = "vsqadd"; OverloadInt = true; break;
- // Signed Saturating Doubling Multiply-Add Long
- case AArch64::BI__builtin_neon_vqdmlalh_s16:
- case AArch64::BI__builtin_neon_vqdmlals_s32:
- Int = Intrinsic::aarch64_neon_vqdmlal;
- s = "vqdmlal"; OverloadWideInt = true; break;
- // Signed Saturating Doubling Multiply-Subtract Long
- case AArch64::BI__builtin_neon_vqdmlslh_s16:
- case AArch64::BI__builtin_neon_vqdmlsls_s32:
- Int = Intrinsic::aarch64_neon_vqdmlsl;
- s = "vqdmlsl"; OverloadWideInt = true; break;
- // Signed Saturating Doubling Multiply Long
- case AArch64::BI__builtin_neon_vqdmullh_s16:
- case AArch64::BI__builtin_neon_vqdmulls_s32:
- Int = Intrinsic::arm_neon_vqdmull;
- s = "vqdmull"; OverloadWideInt = true; break;
- // Scalar Signed Saturating Extract Unsigned Narrow
- case AArch64::BI__builtin_neon_vqmovunh_s16:
- case AArch64::BI__builtin_neon_vqmovuns_s32:
- case AArch64::BI__builtin_neon_vqmovund_s64:
- Int = Intrinsic::arm_neon_vqmovnsu;
- s = "vqmovun"; OverloadNarrowInt = true; break;
- // Scalar Signed Saturating Extract Narrow
- case AArch64::BI__builtin_neon_vqmovnh_s16:
- case AArch64::BI__builtin_neon_vqmovns_s32:
- case AArch64::BI__builtin_neon_vqmovnd_s64:
- Int = Intrinsic::arm_neon_vqmovns;
- s = "vqmovn"; OverloadNarrowInt = true; break;
- // Scalar Unsigned Saturating Extract Narrow
- case AArch64::BI__builtin_neon_vqmovnh_u16:
- case AArch64::BI__builtin_neon_vqmovns_u32:
- case AArch64::BI__builtin_neon_vqmovnd_u64:
- Int = Intrinsic::arm_neon_vqmovnu;
- s = "vqmovn"; OverloadNarrowInt = true; break;
- // Scalar Signed Shift Right (Immediate)
- case AArch64::BI__builtin_neon_vshrd_n_s64:
- Int = Intrinsic::aarch64_neon_vshrds_n;
- s = "vsshr"; OverloadInt = false; break;
- // Scalar Unsigned Shift Right (Immediate)
- case AArch64::BI__builtin_neon_vshrd_n_u64:
- Int = Intrinsic::aarch64_neon_vshrdu_n;
- s = "vushr"; OverloadInt = false; break;
- // Scalar Signed Rounding Shift Right (Immediate)
- case AArch64::BI__builtin_neon_vrshrd_n_s64:
- Int = Intrinsic::aarch64_neon_vsrshr;
- s = "vsrshr"; OverloadInt = true; break;
- // Scalar Unsigned Rounding Shift Right (Immediate)
- case AArch64::BI__builtin_neon_vrshrd_n_u64:
- Int = Intrinsic::aarch64_neon_vurshr;
- s = "vurshr"; OverloadInt = true; break;
- // Scalar Signed Shift Right and Accumulate (Immediate)
- case AArch64::BI__builtin_neon_vsrad_n_s64:
- Int = Intrinsic::aarch64_neon_vsrads_n;
- s = "vssra"; OverloadInt = false; break;
- // Scalar Unsigned Shift Right and Accumulate (Immediate)
- case AArch64::BI__builtin_neon_vsrad_n_u64:
- Int = Intrinsic::aarch64_neon_vsradu_n;
- s = "vusra"; OverloadInt = false; break;
- // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
- case AArch64::BI__builtin_neon_vrsrad_n_s64:
- Int = Intrinsic::aarch64_neon_vrsrads_n;
- s = "vsrsra"; OverloadInt = false; break;
- // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
- case AArch64::BI__builtin_neon_vrsrad_n_u64:
- Int = Intrinsic::aarch64_neon_vrsradu_n;
- s = "vursra"; OverloadInt = false; break;
- // Scalar Signed/Unsigned Shift Left (Immediate)
- case AArch64::BI__builtin_neon_vshld_n_s64:
- case AArch64::BI__builtin_neon_vshld_n_u64:
- Int = Intrinsic::aarch64_neon_vshld_n;
- s = "vshl"; OverloadInt = false; break;
- // Signed Saturating Shift Left (Immediate)
- case AArch64::BI__builtin_neon_vqshlb_n_s8:
- case AArch64::BI__builtin_neon_vqshlh_n_s16:
- case AArch64::BI__builtin_neon_vqshls_n_s32:
- case AArch64::BI__builtin_neon_vqshld_n_s64:
- Int = Intrinsic::aarch64_neon_vqshls_n;
- s = "vsqshl"; OverloadInt = true; break;
- // Unsigned Saturating Shift Left (Immediate)
- case AArch64::BI__builtin_neon_vqshlb_n_u8:
- case AArch64::BI__builtin_neon_vqshlh_n_u16:
- case AArch64::BI__builtin_neon_vqshls_n_u32:
- case AArch64::BI__builtin_neon_vqshld_n_u64:
- Int = Intrinsic::aarch64_neon_vqshlu_n;
- s = "vuqshl"; OverloadInt = true; break;
- // Signed Saturating Shift Left Unsigned (Immediate)
- case AArch64::BI__builtin_neon_vqshlub_n_s8:
- case AArch64::BI__builtin_neon_vqshluh_n_s16:
- case AArch64::BI__builtin_neon_vqshlus_n_s32:
- case AArch64::BI__builtin_neon_vqshlud_n_s64:
- Int = Intrinsic::aarch64_neon_vsqshlu;
- s = "vsqshlu"; OverloadInt = true; break;
- // Shift Right And Insert (Immediate)
- case AArch64::BI__builtin_neon_vsrid_n_s64:
- case AArch64::BI__builtin_neon_vsrid_n_u64:
- Int = Intrinsic::aarch64_neon_vsri;
- s = "vsri"; OverloadInt = true; break;
- // Shift Left And Insert (Immediate)
- case AArch64::BI__builtin_neon_vslid_n_s64:
- case AArch64::BI__builtin_neon_vslid_n_u64:
- Int = Intrinsic::aarch64_neon_vsli;
- s = "vsli"; OverloadInt = true; break;
- // Signed Saturating Shift Right Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqshrnh_n_s16:
- case AArch64::BI__builtin_neon_vqshrns_n_s32:
- case AArch64::BI__builtin_neon_vqshrnd_n_s64:
- Int = Intrinsic::aarch64_neon_vsqshrn;
- s = "vsqshrn"; OverloadInt = true; break;
- // Unsigned Saturating Shift Right Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqshrnh_n_u16:
- case AArch64::BI__builtin_neon_vqshrns_n_u32:
- case AArch64::BI__builtin_neon_vqshrnd_n_u64:
- Int = Intrinsic::aarch64_neon_vuqshrn;
- s = "vuqshrn"; OverloadInt = true; break;
- // Signed Saturating Rounded Shift Right Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqrshrnh_n_s16:
- case AArch64::BI__builtin_neon_vqrshrns_n_s32:
- case AArch64::BI__builtin_neon_vqrshrnd_n_s64:
- Int = Intrinsic::aarch64_neon_vsqrshrn;
- s = "vsqrshrn"; OverloadInt = true; break;
- // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqrshrnh_n_u16:
- case AArch64::BI__builtin_neon_vqrshrns_n_u32:
- case AArch64::BI__builtin_neon_vqrshrnd_n_u64:
- Int = Intrinsic::aarch64_neon_vuqrshrn;
- s = "vuqrshrn"; OverloadInt = true; break;
- // Signed Saturating Shift Right Unsigned Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqshrunh_n_s16:
- case AArch64::BI__builtin_neon_vqshruns_n_s32:
- case AArch64::BI__builtin_neon_vqshrund_n_s64:
- Int = Intrinsic::aarch64_neon_vsqshrun;
- s = "vsqshrun"; OverloadInt = true; break;
- // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
- case AArch64::BI__builtin_neon_vqrshrunh_n_s16:
- case AArch64::BI__builtin_neon_vqrshruns_n_s32:
- case AArch64::BI__builtin_neon_vqrshrund_n_s64:
- Int = Intrinsic::aarch64_neon_vsqrshrun;
- s = "vsqrshrun"; OverloadInt = true; break;
- // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
- case AArch64::BI__builtin_neon_vcvts_n_f32_s32:
- Int = Intrinsic::aarch64_neon_vcvtf32_n_s32;
- s = "vcvtf"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_n_f64_s64:
- Int = Intrinsic::aarch64_neon_vcvtf64_n_s64;
- s = "vcvtf"; OverloadInt = false; break;
- // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
- case AArch64::BI__builtin_neon_vcvts_n_f32_u32:
- Int = Intrinsic::aarch64_neon_vcvtf32_n_u32;
- s = "vcvtf"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_n_f64_u64:
- Int = Intrinsic::aarch64_neon_vcvtf64_n_u64;
- s = "vcvtf"; OverloadInt = false; break;
- // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
- case AArch64::BI__builtin_neon_vcvts_n_s32_f32:
- Int = Intrinsic::aarch64_neon_vcvts_n_s32_f32;
- s = "fcvtzs"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_n_s64_f64:
- Int = Intrinsic::aarch64_neon_vcvtd_n_s64_f64;
- s = "fcvtzs"; OverloadInt = false; break;
- // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
- case AArch64::BI__builtin_neon_vcvts_n_u32_f32:
- Int = Intrinsic::aarch64_neon_vcvts_n_u32_f32;
- s = "fcvtzu"; OverloadInt = false; break;
- case AArch64::BI__builtin_neon_vcvtd_n_u64_f64:
- Int = Intrinsic::aarch64_neon_vcvtd_n_u64_f64;
- s = "fcvtzu"; OverloadInt = false; break;
+ break;
}
- if (!Int)
+ // It didn't need any handling specific to the AArch64 backend, so defer to
+ // common code.
+ return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E);
+}
+
+Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
+ unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
+ const char *NameHint, unsigned Modifier, const CallExpr *E,
+ SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) {
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt NeonTypeConst;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
return 0;
- // AArch64 scalar builtin that returns scalar type
- // and should be mapped to AArch64 intrinsic that returns
- // one-element vector type.
- Function *F = 0;
- if (AcrossVec) {
- // Gen arg type
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- llvm::Type *Ty = CGF.ConvertType(Arg->getType());
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
- llvm::Type *ETy = VTy->getElementType();
- llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
-
- if (ExtendEle) {
- assert(!ETy->isFloatingPointTy());
- RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(NeonTypeConst.getZExtValue());
+ bool Usgn = Type.isUnsigned();
+ bool Quad = Type.isQuad();
+
+ llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+
+ unsigned Int = LLVMIntrinsic;
+ if ((Modifier & UnsignedAlts) && !Usgn)
+ Int = AltLLVMIntrinsic;
+
+ switch (BuiltinID) {
+ default: break;
+ case NEON::BI__builtin_neon_vabs_v:
+ case NEON::BI__builtin_neon_vabsq_v:
+ if (VTy->getElementType()->isFloatingPointTy())
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
+ case NEON::BI__builtin_neon_vaddhn_v: {
+ llvm::VectorType *SrcTy =
+ llvm::VectorType::getExtendedElementVectorType(VTy);
+
+ // %sum = add <4 x i32> %lhs, %rhs
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+ Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
+
+ // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+ Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+ SrcTy->getScalarSizeInBits() / 2);
+ ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+ Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
+
+ // %res = trunc <4 x i32> %high to <4 x i16>
+ return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
+ }
+ case NEON::BI__builtin_neon_vcale_v:
+ case NEON::BI__builtin_neon_vcaleq_v:
+ case NEON::BI__builtin_neon_vcalt_v:
+ case NEON::BI__builtin_neon_vcaltq_v:
+ std::swap(Ops[0], Ops[1]);
+ case NEON::BI__builtin_neon_vcage_v:
+ case NEON::BI__builtin_neon_vcageq_v:
+ case NEON::BI__builtin_neon_vcagt_v:
+ case NEON::BI__builtin_neon_vcagtq_v: {
+ llvm::Type *VecFlt = llvm::VectorType::get(
+ VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
+ VTy->getNumElements());
+ llvm::Type *Tys[] = { VTy, VecFlt };
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+ return EmitNeonCall(F, Ops, NameHint);
+ }
+ case NEON::BI__builtin_neon_vclz_v:
+ case NEON::BI__builtin_neon_vclzq_v:
+ // We generate target-independent intrinsic, which needs a second argument
+ // for whether or not clz of zero is undefined; on ARM it isn't.
+ Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
+ break;
+ case NEON::BI__builtin_neon_vcvt_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_f32_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+ return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+ : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_n_f32_v:
+ case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32,
+ false, Quad));
+ llvm::Type *Tys[2] = { FloatTy, Ty };
+ Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvt_n");
+ }
+ case NEON::BI__builtin_neon_vcvt_n_s32_v:
+ case NEON::BI__builtin_neon_vcvt_n_u32_v:
+ case NEON::BI__builtin_neon_vcvt_n_s64_v:
+ case NEON::BI__builtin_neon_vcvt_n_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32,
+ false, Quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+ return EmitNeonCall(F, Ops, "vcvt_n");
+ }
+ case NEON::BI__builtin_neon_vcvt_s32_v:
+ case NEON::BI__builtin_neon_vcvt_u32_v:
+ case NEON::BI__builtin_neon_vcvt_s64_v:
+ case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32,
+ false, Quad));
+ Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
+ return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
+ : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
+ }
+ case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u64_v:
+ case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u64_v:
+ case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u64_v:
+ case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u64_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, Quad));
+ llvm::Type *Tys[2] = { Ty, InTy };
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
+ }
+ case NEON::BI__builtin_neon_vext_v:
+ case NEON::BI__builtin_neon_vextq_v: {
+ int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+ Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
+
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Value *SV = llvm::ConstantVector::get(Indices);
+ return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
+ }
+ case NEON::BI__builtin_neon_vfma_v:
+ case NEON::BI__builtin_neon_vfmaq_v: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ Ops.push_back(Align);
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v:
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v:
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v: {
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+ Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint);
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v: {
+ Value *V = UndefValue::get(Ty);
+ Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ LoadInst *Ld = Builder.CreateLoad(Ops[0]);
+ Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+ llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
+ return EmitNeonSplat(Ops[0], CI);
+ }
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v:
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v: {
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+ for (unsigned I = 2; I < Ops.size() - 1; ++I)
+ Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
+ Ops.push_back(Align);
+ Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vmovl_v: {
+ llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
+ Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
+ if (Usgn)
+ return Builder.CreateZExt(Ops[0], Ty, "vmovl");
+ return Builder.CreateSExt(Ops[0], Ty, "vmovl");
+ }
+ case NEON::BI__builtin_neon_vmovn_v: {
+ llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+ Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
+ return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
+ }
+ case NEON::BI__builtin_neon_vmull_v:
+ // FIXME: the integer vmull operations could be emitted in terms of pure
+ // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
+ // hoisting the exts outside loops. Until global ISel comes along that can
+ // see through such movement this leads to bad CodeGen. So we need an
+ // intrinsic for now.
+ Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
+ Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
+ case NEON::BI__builtin_neon_vpadal_v:
+ case NEON::BI__builtin_neon_vpadalq_v: {
+ // The source operand type has twice as many elements of half the size.
+ unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ llvm::Type *EltTy =
+ llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
+ llvm::Type *NarrowTy =
+ llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+ llvm::Type *Tys[2] = { Ty, NarrowTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
+ }
+ case NEON::BI__builtin_neon_vpaddl_v:
+ case NEON::BI__builtin_neon_vpaddlq_v: {
+ // The source operand type has twice as many elements of half the size.
+ unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
+ llvm::Type *NarrowTy =
+ llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+ llvm::Type *Tys[2] = { Ty, NarrowTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
+ }
+ case NEON::BI__builtin_neon_vqdmlal_v:
+ case NEON::BI__builtin_neon_vqdmlsl_v: {
+ SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
+ Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty),
+ MulOps, "vqdmlal");
+
+ SmallVector<Value *, 2> AccumOps;
+ AccumOps.push_back(Ops[0]);
+ AccumOps.push_back(Mul);
+ return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty),
+ AccumOps, NameHint);
+ }
+ case NEON::BI__builtin_neon_vqshl_n_v:
+ case NEON::BI__builtin_neon_vqshlq_n_v:
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
+ 1, false);
+ case NEON::BI__builtin_neon_vrecpe_v:
+ case NEON::BI__builtin_neon_vrecpeq_v:
+ case NEON::BI__builtin_neon_vrsqrte_v:
+ case NEON::BI__builtin_neon_vrsqrteq_v:
+ Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
+
+ case NEON::BI__builtin_neon_vshl_n_v:
+ case NEON::BI__builtin_neon_vshlq_n_v:
+ Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
+ return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
+ "vshl_n");
+ case NEON::BI__builtin_neon_vshll_n_v: {
+ llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ if (Usgn)
+ Ops[0] = Builder.CreateZExt(Ops[0], VTy);
+ else
+ Ops[0] = Builder.CreateSExt(Ops[0], VTy);
+ Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
+ return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
+ }
+ case NEON::BI__builtin_neon_vshrn_n_v: {
+ llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
+ if (Usgn)
+ Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
+ else
+ Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
+ return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
+ }
+ case NEON::BI__builtin_neon_vshr_n_v:
+ case NEON::BI__builtin_neon_vshrq_n_v:
+ return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v:
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v:
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v:
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v:
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v:
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v:
+ Ops.push_back(Align);
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+ case NEON::BI__builtin_neon_vsubhn_v: {
+ llvm::VectorType *SrcTy =
+ llvm::VectorType::getExtendedElementVectorType(VTy);
+
+ // %sum = add <4 x i32> %lhs, %rhs
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+ Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
+
+ // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+ Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+ SrcTy->getScalarSizeInBits() / 2);
+ ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+ Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
+
+ // %res = trunc <4 x i32> %high to <4 x i16>
+ return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
+ }
+ case NEON::BI__builtin_neon_vtrn_v:
+ case NEON::BI__builtin_neon_vtrnq_v: {
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Value *SV = 0;
+
+ for (unsigned vi = 0; vi != 2; ++vi) {
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+ Indices.push_back(Builder.getInt32(i+vi));
+ Indices.push_back(Builder.getInt32(i+e+vi));
+ }
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices);
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+ SV = Builder.CreateStore(SV, Addr);
}
+ return SV;
+ }
+ case NEON::BI__builtin_neon_vtst_v:
+ case NEON::BI__builtin_neon_vtstq_v: {
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
+ Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
+ ConstantAggregateZero::get(Ty));
+ return Builder.CreateSExt(Ops[0], Ty, "vtst");
+ }
+ case NEON::BI__builtin_neon_vuzp_v:
+ case NEON::BI__builtin_neon_vuzpq_v: {
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Value *SV = 0;
- llvm::Type *Tys[2] = {RTy, VTy};
- F = CGF.CGM.getIntrinsic(Int, Tys);
- assert(E->getNumArgs() == 1);
- } else if (OverloadInt) {
- // Determine the type of this overloaded AArch64 intrinsic
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
- assert(VTy);
+ for (unsigned vi = 0; vi != 2; ++vi) {
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+ Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
- F = CGF.CGM.getIntrinsic(Int, VTy);
- } else if (OverloadWideInt || OverloadNarrowInt) {
- // Determine the type of this overloaded AArch64 intrinsic
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- llvm::Type *Ty = CGF.ConvertType(Arg->getType());
- llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
- llvm::VectorType *RTy = OverloadWideInt ?
- llvm::VectorType::getExtendedElementVectorType(VTy) :
- llvm::VectorType::getTruncatedElementVectorType(VTy);
- F = CGF.CGM.getIntrinsic(Int, RTy);
- } else if (OverloadCmpInt) {
- // Determine the types of this overloaded AArch64 intrinsic
- SmallVector<llvm::Type *, 3> Tys;
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
- Tys.push_back(VTy);
- Ty = CGF.ConvertType(Arg->getType());
- VTy = llvm::VectorType::get(Ty, 1);
- Tys.push_back(VTy);
- if(IsFpCmpZInt)
- VTy = llvm::VectorType::get(CGF.FloatTy, 1);
- Tys.push_back(VTy);
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices);
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+ SV = Builder.CreateStore(SV, Addr);
+ }
+ return SV;
+ }
+ case NEON::BI__builtin_neon_vzip_v:
+ case NEON::BI__builtin_neon_vzipq_v: {
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Value *SV = 0;
- F = CGF.CGM.getIntrinsic(Int, Tys);
- } else if (OverloadCvtInt) {
- // Determine the types of this overloaded AArch64 intrinsic
- SmallVector<llvm::Type *, 2> Tys;
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
- Tys.push_back(VTy);
- Ty = CGF.ConvertType(Arg->getType());
- VTy = llvm::VectorType::get(Ty, 1);
- Tys.push_back(VTy);
+ for (unsigned vi = 0; vi != 2; ++vi) {
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+ Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
+ Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+ }
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices);
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+ SV = Builder.CreateStore(SV, Addr);
+ }
+ return SV;
+ }
+ }
- F = CGF.CGM.getIntrinsic(Int, Tys);
- } else
- F = CGF.CGM.getIntrinsic(Int);
+ assert(Int && "Expected valid intrinsic number");
- Value *Result = CGF.EmitNeonCall(F, Ops, s);
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ // Determine the type(s) of this overloaded AArch64 intrinsic.
+ Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
+
+ Value *Result = EmitNeonCall(F, Ops, NameHint);
+ llvm::Type *ResultType = ConvertType(E->getType());
// AArch64 intrinsic one-element vector type cast to
// scalar type expected by the builtin
- return CGF.Builder.CreateBitCast(Result, ResultType, s);
+ return Builder.CreateBitCast(Result, ResultType, NameHint);
}
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
const CmpInst::Predicate Ip, const Twine &Name) {
- llvm::Type *OTy = ((llvm::User *)Op)->getOperand(0)->getType();
- if (OTy->isPointerTy())
- OTy = Ty;
+ llvm::Type *OTy = Op->getType();
+
+ // FIXME: this is utterly horrific. We should not be looking at previous
+ // codegen context to find out what needs doing. Unfortunately TableGen
+ // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
+ // (etc).
+ if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
+ OTy = BI->getOperand(0)->getType();
+
Op = Builder.CreateBitCast(Op, OTy);
- if (((llvm::VectorType *)OTy)->getElementType()->isFloatingPointTy()) {
- Op = Builder.CreateFCmp(Fp, Op, ConstantAggregateZero::get(OTy));
+ if (OTy->getScalarType()->isFloatingPointTy()) {
+ Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
} else {
- Op = Builder.CreateICmp(Ip, Op, ConstantAggregateZero::get(OTy));
+ Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
}
- return Builder.CreateZExt(Op, Ty, Name);
+ return Builder.CreateSExt(Op, Ty, Name);
}
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
@@ -2681,13 +3526,9 @@
ZeroTbl, SV, Name));
}
- TblTy = llvm::VectorType::get(TblTy->getElementType(),
- 2*TblTy->getNumElements());
- llvm::Type *Tys[2] = { ResTy, TblTy };
-
Function *TblF;
TblOps.push_back(IndexOp);
- TblF = CGF.CGM.getIntrinsic(IntID, Tys);
+ TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
return CGF.EmitNeonCall(TblF, TblOps, Name);
}
@@ -2698,37 +3539,33 @@
unsigned int Int = 0;
const char *s = NULL;
- unsigned TblPos;
switch (BuiltinID) {
default:
return 0;
- case AArch64::BI__builtin_neon_vtbl1_v:
- case AArch64::BI__builtin_neon_vqtbl1_v:
- case AArch64::BI__builtin_neon_vqtbl1q_v:
- case AArch64::BI__builtin_neon_vtbl2_v:
- case AArch64::BI__builtin_neon_vqtbl2_v:
- case AArch64::BI__builtin_neon_vqtbl2q_v:
- case AArch64::BI__builtin_neon_vtbl3_v:
- case AArch64::BI__builtin_neon_vqtbl3_v:
- case AArch64::BI__builtin_neon_vqtbl3q_v:
- case AArch64::BI__builtin_neon_vtbl4_v:
- case AArch64::BI__builtin_neon_vqtbl4_v:
- case AArch64::BI__builtin_neon_vqtbl4q_v:
- TblPos = 0;
- break;
- case AArch64::BI__builtin_neon_vtbx1_v:
- case AArch64::BI__builtin_neon_vqtbx1_v:
- case AArch64::BI__builtin_neon_vqtbx1q_v:
- case AArch64::BI__builtin_neon_vtbx2_v:
- case AArch64::BI__builtin_neon_vqtbx2_v:
- case AArch64::BI__builtin_neon_vqtbx2q_v:
- case AArch64::BI__builtin_neon_vtbx3_v:
- case AArch64::BI__builtin_neon_vqtbx3_v:
- case AArch64::BI__builtin_neon_vqtbx3q_v:
- case AArch64::BI__builtin_neon_vtbx4_v:
- case AArch64::BI__builtin_neon_vqtbx4_v:
- case AArch64::BI__builtin_neon_vqtbx4q_v:
- TblPos = 1;
+ case NEON::BI__builtin_neon_vtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1q_v:
+ case NEON::BI__builtin_neon_vtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2q_v:
+ case NEON::BI__builtin_neon_vtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3q_v:
+ case NEON::BI__builtin_neon_vtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4q_v:
+ case NEON::BI__builtin_neon_vtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1q_v:
+ case NEON::BI__builtin_neon_vtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2q_v:
+ case NEON::BI__builtin_neon_vtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3q_v:
+ case NEON::BI__builtin_neon_vtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4q_v:
break;
}
@@ -2752,35 +3589,31 @@
Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
}
- Arg = E->getArg(TblPos);
- llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
- llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
- llvm::Type *Tys[2] = { Ty, VTblTy };
unsigned nElts = VTy->getNumElements();
// AArch64 scalar builtins are not overloaded, they do not have an extra
// argument that specifies the vector type, need to handle each case.
SmallVector<Value *, 2> TblOps;
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vtbl1_v: {
+ case NEON::BI__builtin_neon_vtbl1_v: {
TblOps.push_back(Ops[0]);
return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
Intrinsic::aarch64_neon_vtbl1, "vtbl1");
}
- case AArch64::BI__builtin_neon_vtbl2_v: {
+ case NEON::BI__builtin_neon_vtbl2_v: {
TblOps.push_back(Ops[0]);
TblOps.push_back(Ops[1]);
return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
Intrinsic::aarch64_neon_vtbl1, "vtbl1");
}
- case AArch64::BI__builtin_neon_vtbl3_v: {
+ case NEON::BI__builtin_neon_vtbl3_v: {
TblOps.push_back(Ops[0]);
TblOps.push_back(Ops[1]);
TblOps.push_back(Ops[2]);
return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
Intrinsic::aarch64_neon_vtbl2, "vtbl2");
}
- case AArch64::BI__builtin_neon_vtbl4_v: {
+ case NEON::BI__builtin_neon_vtbl4_v: {
TblOps.push_back(Ops[0]);
TblOps.push_back(Ops[1]);
TblOps.push_back(Ops[2]);
@@ -2788,7 +3621,7 @@
return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
Intrinsic::aarch64_neon_vtbl2, "vtbl2");
}
- case AArch64::BI__builtin_neon_vtbx1_v: {
+ case NEON::BI__builtin_neon_vtbx1_v: {
TblOps.push_back(Ops[1]);
Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
Intrinsic::aarch64_neon_vtbl1, "vtbl1");
@@ -2805,13 +3638,13 @@
Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
}
- case AArch64::BI__builtin_neon_vtbx2_v: {
+ case NEON::BI__builtin_neon_vtbx2_v: {
TblOps.push_back(Ops[1]);
TblOps.push_back(Ops[2]);
return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
Intrinsic::aarch64_neon_vtbx1, "vtbx1");
}
- case AArch64::BI__builtin_neon_vtbx3_v: {
+ case NEON::BI__builtin_neon_vtbx3_v: {
TblOps.push_back(Ops[1]);
TblOps.push_back(Ops[2]);
TblOps.push_back(Ops[3]);
@@ -2831,7 +3664,7 @@
Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
}
- case AArch64::BI__builtin_neon_vtbx4_v: {
+ case NEON::BI__builtin_neon_vtbx4_v: {
TblOps.push_back(Ops[1]);
TblOps.push_back(Ops[2]);
TblOps.push_back(Ops[3]);
@@ -2839,29 +3672,29 @@
return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
Intrinsic::aarch64_neon_vtbx2, "vtbx2");
}
- case AArch64::BI__builtin_neon_vqtbl1_v:
- case AArch64::BI__builtin_neon_vqtbl1q_v:
+ case NEON::BI__builtin_neon_vqtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1q_v:
Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
- case AArch64::BI__builtin_neon_vqtbl2_v:
- case AArch64::BI__builtin_neon_vqtbl2q_v: {
+ case NEON::BI__builtin_neon_vqtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2q_v: {
Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
- case AArch64::BI__builtin_neon_vqtbl3_v:
- case AArch64::BI__builtin_neon_vqtbl3q_v:
+ case NEON::BI__builtin_neon_vqtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3q_v:
Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
- case AArch64::BI__builtin_neon_vqtbl4_v:
- case AArch64::BI__builtin_neon_vqtbl4q_v:
+ case NEON::BI__builtin_neon_vqtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4q_v:
Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
- case AArch64::BI__builtin_neon_vqtbx1_v:
- case AArch64::BI__builtin_neon_vqtbx1q_v:
+ case NEON::BI__builtin_neon_vqtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1q_v:
Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
- case AArch64::BI__builtin_neon_vqtbx2_v:
- case AArch64::BI__builtin_neon_vqtbx2q_v:
+ case NEON::BI__builtin_neon_vqtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2q_v:
Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
- case AArch64::BI__builtin_neon_vqtbx3_v:
- case AArch64::BI__builtin_neon_vqtbx3q_v:
+ case NEON::BI__builtin_neon_vqtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3q_v:
Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
- case AArch64::BI__builtin_neon_vqtbx4_v:
- case AArch64::BI__builtin_neon_vqtbx4q_v:
+ case NEON::BI__builtin_neon_vqtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4q_v:
Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
}
}
@@ -2869,15 +3702,23 @@
if (!Int)
return 0;
- Function *F = CGF.CGM.getIntrinsic(Int, Tys);
+ Function *F = CGF.CGM.getIntrinsic(Int, Ty);
return CGF.EmitNeonCall(F, Ops, s);
}
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+
// Process AArch64 scalar builtins
- if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
+ llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo);
+ const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted);
+
+ if (Builtin) {
+ Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E);
+ assert(Result && "SISD intrinsic should have been handled");
return Result;
+ }
// Process AArch64 table lookup builtins
if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
@@ -2899,20 +3740,60 @@
SmallVector<Value *, 4> Ops;
llvm::Value *Align = 0; // Alignment for load/store
+
+ if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) {
+ Value *Op = EmitScalarExpr(E->getArg(0));
+ unsigned addressSpace =
+ cast<llvm::PointerType>(Op->getType())->getAddressSpace();
+ llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
+ Op = Builder.CreateBitCast(Op, Ty);
+ Op = Builder.CreateLoad(Op);
+ Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
+ return Builder.CreateBitCast(Op, Ty);
+ }
+ if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ unsigned addressSpace =
+ cast<llvm::PointerType>(Op0->getType())->getAddressSpace();
+ llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
+ Op0 = Builder.CreateBitCast(Op0, PTy);
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext());
+ Op1 = Builder.CreateBitCast(Op1, Ty);
+ return Builder.CreateStore(Op1, Op0);
+ }
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
if (i == 0) {
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vst1_x2_v:
- case AArch64::BI__builtin_neon_vst1q_x2_v:
- case AArch64::BI__builtin_neon_vst1_x3_v:
- case AArch64::BI__builtin_neon_vst1q_x3_v:
- case AArch64::BI__builtin_neon_vst1_x4_v:
- case AArch64::BI__builtin_neon_vst1q_x4_v:
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v:
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v:
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v:
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v:
// Handle ld1/st1 lane in this function a little different from ARM.
- case AArch64::BI__builtin_neon_vld1_lane_v:
- case AArch64::BI__builtin_neon_vld1q_lane_v:
- case AArch64::BI__builtin_neon_vst1_lane_v:
- case AArch64::BI__builtin_neon_vst1q_lane_v:
+ case NEON::BI__builtin_neon_vld1_lane_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v:
+ case NEON::BI__builtin_neon_vst1_lane_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v:
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v:
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v:
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v:
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
@@ -2924,21 +3805,31 @@
}
if (i == 1) {
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vld1_x2_v:
- case AArch64::BI__builtin_neon_vld1q_x2_v:
- case AArch64::BI__builtin_neon_vld1_x3_v:
- case AArch64::BI__builtin_neon_vld1q_x3_v:
- case AArch64::BI__builtin_neon_vld1_x4_v:
- case AArch64::BI__builtin_neon_vld1q_x4_v:
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v:
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v:
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v:
// Handle ld1/st1 dup lane in this function a little different from ARM.
- case AArch64::BI__builtin_neon_vld2_dup_v:
- case AArch64::BI__builtin_neon_vld2q_dup_v:
- case AArch64::BI__builtin_neon_vld3_dup_v:
- case AArch64::BI__builtin_neon_vld3q_dup_v:
- case AArch64::BI__builtin_neon_vld4_dup_v:
- case AArch64::BI__builtin_neon_vld4q_dup_v:
- case AArch64::BI__builtin_neon_vld2_lane_v:
- case AArch64::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v:
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
@@ -2967,6 +3858,17 @@
if (!Ty)
return 0;
+
+ // Many NEON builtins have identical semantics and uses in ARM and
+ // AArch64. Emit these in a single function.
+ llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
+ Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID,
+ NEONSIMDIntrinsicsProvenSorted);
+ if (Builtin)
+ return EmitCommonNeonBuiltinExpr(
+ Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
+
unsigned Int;
switch (BuiltinID) {
default:
@@ -2975,208 +3877,30 @@
// AArch64 builtins mapping to legacy ARM v7 builtins.
// FIXME: the mapped builtins listed correspond to what has been tested
// in aarch64-neon-intrinsics.c so far.
- case AArch64::BI__builtin_neon_vuzp_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E);
- case AArch64::BI__builtin_neon_vuzpq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E);
- case AArch64::BI__builtin_neon_vzip_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E);
- case AArch64::BI__builtin_neon_vzipq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E);
- case AArch64::BI__builtin_neon_vtrn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E);
- case AArch64::BI__builtin_neon_vtrnq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E);
- case AArch64::BI__builtin_neon_vext_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E);
- case AArch64::BI__builtin_neon_vextq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vextq_v, E);
- case AArch64::BI__builtin_neon_vmul_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
- case AArch64::BI__builtin_neon_vmulq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
- case AArch64::BI__builtin_neon_vabd_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
- case AArch64::BI__builtin_neon_vabdq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
- case AArch64::BI__builtin_neon_vfma_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
- case AArch64::BI__builtin_neon_vfmaq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
- case AArch64::BI__builtin_neon_vbsl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
- case AArch64::BI__builtin_neon_vbslq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
- case AArch64::BI__builtin_neon_vrsqrts_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
- case AArch64::BI__builtin_neon_vrsqrtsq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
- case AArch64::BI__builtin_neon_vrecps_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
- case AArch64::BI__builtin_neon_vrecpsq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
- case AArch64::BI__builtin_neon_vcale_v:
- if (VTy->getVectorNumElements() == 1) {
- std::swap(Ops[0], Ops[1]);
- } else {
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
- }
- case AArch64::BI__builtin_neon_vcage_v:
- if (VTy->getVectorNumElements() == 1) {
- // Determine the types of this overloaded AArch64 intrinsic
- SmallVector<llvm::Type *, 3> Tys;
- Tys.push_back(VTy);
- VTy = llvm::VectorType::get(DoubleTy, 1);
- Tys.push_back(VTy);
- Tys.push_back(VTy);
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcage, Tys);
- return EmitNeonCall(F, Ops, "vcage");
- }
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
- case AArch64::BI__builtin_neon_vcaleq_v:
- std::swap(Ops[0], Ops[1]);
- case AArch64::BI__builtin_neon_vcageq_v: {
- Function *F;
- if (VTy->getElementType()->isIntegerTy(64))
- F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
- else
- F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
- return EmitNeonCall(F, Ops, "vcage");
- }
- case AArch64::BI__builtin_neon_vcalt_v:
- if (VTy->getVectorNumElements() == 1) {
- std::swap(Ops[0], Ops[1]);
- } else {
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
- }
- case AArch64::BI__builtin_neon_vcagt_v:
- if (VTy->getVectorNumElements() == 1) {
- // Determine the types of this overloaded AArch64 intrinsic
- SmallVector<llvm::Type *, 3> Tys;
- Tys.push_back(VTy);
- VTy = llvm::VectorType::get(DoubleTy, 1);
- Tys.push_back(VTy);
- Tys.push_back(VTy);
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcagt, Tys);
- return EmitNeonCall(F, Ops, "vcagt");
- }
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
- case AArch64::BI__builtin_neon_vcaltq_v:
- std::swap(Ops[0], Ops[1]);
- case AArch64::BI__builtin_neon_vcagtq_v: {
- Function *F;
- if (VTy->getElementType()->isIntegerTy(64))
- F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
- else
- F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
- return EmitNeonCall(F, Ops, "vcagt");
- }
- case AArch64::BI__builtin_neon_vtst_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
- case AArch64::BI__builtin_neon_vtstq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
- case AArch64::BI__builtin_neon_vhadd_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
- case AArch64::BI__builtin_neon_vhaddq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
- case AArch64::BI__builtin_neon_vhsub_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
- case AArch64::BI__builtin_neon_vhsubq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
- case AArch64::BI__builtin_neon_vrhadd_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
- case AArch64::BI__builtin_neon_vrhaddq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
- case AArch64::BI__builtin_neon_vqadd_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
- case AArch64::BI__builtin_neon_vqaddq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
- case AArch64::BI__builtin_neon_vqsub_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
- case AArch64::BI__builtin_neon_vqsubq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
- case AArch64::BI__builtin_neon_vshl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
- case AArch64::BI__builtin_neon_vshlq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
- case AArch64::BI__builtin_neon_vqshl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
- case AArch64::BI__builtin_neon_vqshlq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
- case AArch64::BI__builtin_neon_vrshl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
- case AArch64::BI__builtin_neon_vrshlq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
- case AArch64::BI__builtin_neon_vqrshl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
- case AArch64::BI__builtin_neon_vqrshlq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
- case AArch64::BI__builtin_neon_vaddhn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
- case AArch64::BI__builtin_neon_vraddhn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
- case AArch64::BI__builtin_neon_vsubhn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
- case AArch64::BI__builtin_neon_vrsubhn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
- case AArch64::BI__builtin_neon_vmull_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
- case AArch64::BI__builtin_neon_vqdmull_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
- case AArch64::BI__builtin_neon_vqdmlal_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
- case AArch64::BI__builtin_neon_vqdmlsl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
- case AArch64::BI__builtin_neon_vmax_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
- case AArch64::BI__builtin_neon_vmaxq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
- case AArch64::BI__builtin_neon_vmin_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
- case AArch64::BI__builtin_neon_vminq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
- case AArch64::BI__builtin_neon_vpmax_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
- case AArch64::BI__builtin_neon_vpmin_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
- case AArch64::BI__builtin_neon_vpadd_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
- case AArch64::BI__builtin_neon_vqdmulh_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
- case AArch64::BI__builtin_neon_vqdmulhq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
- case AArch64::BI__builtin_neon_vqrdmulh_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
- case AArch64::BI__builtin_neon_vqrdmulhq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
// Shift by immediate
- case AArch64::BI__builtin_neon_vshr_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E);
- case AArch64::BI__builtin_neon_vshrq_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E);
- case AArch64::BI__builtin_neon_vrshr_n_v:
- case AArch64::BI__builtin_neon_vrshrq_n_v:
+ case NEON::BI__builtin_neon_vrshr_n_v:
+ case NEON::BI__builtin_neon_vrshrq_n_v:
Int = usgn ? Intrinsic::aarch64_neon_vurshr
: Intrinsic::aarch64_neon_vsrshr;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
- case AArch64::BI__builtin_neon_vsra_n_v:
+ case NEON::BI__builtin_neon_vsra_n_v:
if (VTy->getElementType()->isIntegerTy(64)) {
Int = usgn ? Intrinsic::aarch64_neon_vsradu_n
: Intrinsic::aarch64_neon_vsrads_n;
return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n");
}
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E);
- case AArch64::BI__builtin_neon_vsraq_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E);
- case AArch64::BI__builtin_neon_vrsra_n_v:
+ return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E);
+ case NEON::BI__builtin_neon_vsraq_n_v:
+ return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E);
+ case NEON::BI__builtin_neon_vrsra_n_v:
if (VTy->getElementType()->isIntegerTy(64)) {
Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n
: Intrinsic::aarch64_neon_vrsrads_n;
return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n");
}
// fall through
- case AArch64::BI__builtin_neon_vrsraq_n_v: {
+ case NEON::BI__builtin_neon_vrsraq_n_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Int = usgn ? Intrinsic::aarch64_neon_vurshr
@@ -3184,73 +3908,39 @@
Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
}
- case AArch64::BI__builtin_neon_vshl_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
- case AArch64::BI__builtin_neon_vshlq_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
- case AArch64::BI__builtin_neon_vqshl_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E);
- case AArch64::BI__builtin_neon_vqshlq_n_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E);
- case AArch64::BI__builtin_neon_vqshlu_n_v:
- case AArch64::BI__builtin_neon_vqshluq_n_v:
+ case NEON::BI__builtin_neon_vqshlu_n_v:
+ case NEON::BI__builtin_neon_vqshluq_n_v:
Int = Intrinsic::aarch64_neon_vsqshlu;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
- case AArch64::BI__builtin_neon_vsri_n_v:
- case AArch64::BI__builtin_neon_vsriq_n_v:
+ case NEON::BI__builtin_neon_vsri_n_v:
+ case NEON::BI__builtin_neon_vsriq_n_v:
Int = Intrinsic::aarch64_neon_vsri;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
- case AArch64::BI__builtin_neon_vsli_n_v:
- case AArch64::BI__builtin_neon_vsliq_n_v:
+ case NEON::BI__builtin_neon_vsli_n_v:
+ case NEON::BI__builtin_neon_vsliq_n_v:
Int = Intrinsic::aarch64_neon_vsli;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
- case AArch64::BI__builtin_neon_vshll_n_v: {
- llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- if (usgn)
- Ops[0] = Builder.CreateZExt(Ops[0], VTy);
- else
- Ops[0] = Builder.CreateSExt(Ops[0], VTy);
- Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
- return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
- }
- case AArch64::BI__builtin_neon_vshrn_n_v: {
- llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
- if (usgn)
- Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
- else
- Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
- return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
- }
- case AArch64::BI__builtin_neon_vqshrun_n_v:
+ case NEON::BI__builtin_neon_vqshrun_n_v:
Int = Intrinsic::aarch64_neon_vsqshrun;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
- case AArch64::BI__builtin_neon_vrshrn_n_v:
+ case NEON::BI__builtin_neon_vrshrn_n_v:
Int = Intrinsic::aarch64_neon_vrshrn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
- case AArch64::BI__builtin_neon_vqrshrun_n_v:
+ case NEON::BI__builtin_neon_vqrshrun_n_v:
Int = Intrinsic::aarch64_neon_vsqrshrun;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
- case AArch64::BI__builtin_neon_vqshrn_n_v:
+ case NEON::BI__builtin_neon_vqshrn_n_v:
Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
: Intrinsic::aarch64_neon_vsqshrn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
- case AArch64::BI__builtin_neon_vqrshrn_n_v:
+ case NEON::BI__builtin_neon_vqrshrn_n_v:
Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
: Intrinsic::aarch64_neon_vsqrshrn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
// Convert
- case AArch64::BI__builtin_neon_vmovl_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
- case AArch64::BI__builtin_neon_vcvt_n_f32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
- case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
- case AArch64::BI__builtin_neon_vcvt_n_f64_v:
- case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
+ case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
llvm::Type *FloatTy =
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
llvm::Type *Tys[2] = { FloatTy, Ty };
@@ -3259,78 +3949,26 @@
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
- case AArch64::BI__builtin_neon_vcvt_n_s32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E);
- case AArch64::BI__builtin_neon_vcvtq_n_s32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E);
- case AArch64::BI__builtin_neon_vcvt_n_u32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
- case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
- case AArch64::BI__builtin_neon_vcvt_n_s64_v:
- case AArch64::BI__builtin_neon_vcvt_n_u64_v:
- case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
- case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
- llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
- llvm::Type *Tys[2] = { Ty, FloatTy };
- Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
- : Intrinsic::arm_neon_vcvtfp2fxs;
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
// Load/Store
- case AArch64::BI__builtin_neon_vld1_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E);
- case AArch64::BI__builtin_neon_vld1q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E);
- case AArch64::BI__builtin_neon_vld2_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E);
- case AArch64::BI__builtin_neon_vld2q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E);
- case AArch64::BI__builtin_neon_vld3_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E);
- case AArch64::BI__builtin_neon_vld3q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E);
- case AArch64::BI__builtin_neon_vld4_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E);
- case AArch64::BI__builtin_neon_vld4q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E);
- case AArch64::BI__builtin_neon_vst1_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E);
- case AArch64::BI__builtin_neon_vst1q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E);
- case AArch64::BI__builtin_neon_vst2_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E);
- case AArch64::BI__builtin_neon_vst2q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E);
- case AArch64::BI__builtin_neon_vst3_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E);
- case AArch64::BI__builtin_neon_vst3q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E);
- case AArch64::BI__builtin_neon_vst4_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
- case AArch64::BI__builtin_neon_vst4q_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
- case AArch64::BI__builtin_neon_vld1_x2_v:
- case AArch64::BI__builtin_neon_vld1q_x2_v:
- case AArch64::BI__builtin_neon_vld1_x3_v:
- case AArch64::BI__builtin_neon_vld1q_x3_v:
- case AArch64::BI__builtin_neon_vld1_x4_v:
- case AArch64::BI__builtin_neon_vld1q_x4_v: {
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v: {
unsigned Int;
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vld1_x2_v:
- case AArch64::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
Int = Intrinsic::aarch64_neon_vld1x2;
break;
- case AArch64::BI__builtin_neon_vld1_x3_v:
- case AArch64::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
Int = Intrinsic::aarch64_neon_vld1x3;
break;
- case AArch64::BI__builtin_neon_vld1_x4_v:
- case AArch64::BI__builtin_neon_vld1q_x4_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v:
Int = Intrinsic::aarch64_neon_vld1x4;
break;
}
@@ -3340,32 +3978,32 @@
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vst1_x2_v:
- case AArch64::BI__builtin_neon_vst1q_x2_v:
- case AArch64::BI__builtin_neon_vst1_x3_v:
- case AArch64::BI__builtin_neon_vst1q_x3_v:
- case AArch64::BI__builtin_neon_vst1_x4_v:
- case AArch64::BI__builtin_neon_vst1q_x4_v: {
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v: {
Ops.push_back(Align);
unsigned Int;
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vst1_x2_v:
- case AArch64::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
Int = Intrinsic::aarch64_neon_vst1x2;
break;
- case AArch64::BI__builtin_neon_vst1_x3_v:
- case AArch64::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
Int = Intrinsic::aarch64_neon_vst1x3;
break;
- case AArch64::BI__builtin_neon_vst1_x4_v:
- case AArch64::BI__builtin_neon_vst1q_x4_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v:
Int = Intrinsic::aarch64_neon_vst1x4;
break;
}
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
}
- case AArch64::BI__builtin_neon_vld1_lane_v:
- case AArch64::BI__builtin_neon_vld1q_lane_v: {
+ case NEON::BI__builtin_neon_vld1_lane_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -3373,20 +4011,8 @@
Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
}
- case AArch64::BI__builtin_neon_vld2_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
- case AArch64::BI__builtin_neon_vld2q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
- case AArch64::BI__builtin_neon_vld3_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E);
- case AArch64::BI__builtin_neon_vld3q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E);
- case AArch64::BI__builtin_neon_vld4_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E);
- case AArch64::BI__builtin_neon_vld4q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E);
- case AArch64::BI__builtin_neon_vst1_lane_v:
- case AArch64::BI__builtin_neon_vst1q_lane_v: {
+ case NEON::BI__builtin_neon_vst1_lane_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
@@ -3395,39 +4021,23 @@
St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return St;
}
- case AArch64::BI__builtin_neon_vst2_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E);
- case AArch64::BI__builtin_neon_vst2q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E);
- case AArch64::BI__builtin_neon_vst3_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E);
- case AArch64::BI__builtin_neon_vst3q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E);
- case AArch64::BI__builtin_neon_vst4_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E);
- case AArch64::BI__builtin_neon_vst4q_lane_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E);
- case AArch64::BI__builtin_neon_vld1_dup_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E);
- case AArch64::BI__builtin_neon_vld1q_dup_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E);
- case AArch64::BI__builtin_neon_vld2_dup_v:
- case AArch64::BI__builtin_neon_vld2q_dup_v:
- case AArch64::BI__builtin_neon_vld3_dup_v:
- case AArch64::BI__builtin_neon_vld3q_dup_v:
- case AArch64::BI__builtin_neon_vld4_dup_v:
- case AArch64::BI__builtin_neon_vld4q_dup_v: {
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v: {
// Handle 64-bit x 1 elements as a special-case. There is no "dup" needed.
if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
VTy->getNumElements() == 1) {
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
Int = Intrinsic::arm_neon_vld2;
break;
- case AArch64::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
Int = Intrinsic::arm_neon_vld3;
break;
- case AArch64::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
Int = Intrinsic::arm_neon_vld4;
break;
default:
@@ -3440,16 +4050,16 @@
return Builder.CreateStore(Ops[1], Ops[0]);
}
switch (BuiltinID) {
- case AArch64::BI__builtin_neon_vld2_dup_v:
- case AArch64::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
Int = Intrinsic::arm_neon_vld2lane;
break;
- case AArch64::BI__builtin_neon_vld3_dup_v:
- case AArch64::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
Int = Intrinsic::arm_neon_vld3lane;
break;
- case AArch64::BI__builtin_neon_vld4_dup_v:
- case AArch64::BI__builtin_neon_vld4q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
Int = Intrinsic::arm_neon_vld4lane;
break;
}
@@ -3478,42 +4088,11 @@
return Builder.CreateStore(Ops[1], Ops[0]);
}
- // Crypto
- case AArch64::BI__builtin_neon_vaeseq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty),
- Ops, "aese");
- case AArch64::BI__builtin_neon_vaesdq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty),
- Ops, "aesd");
- case AArch64::BI__builtin_neon_vaesmcq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty),
- Ops, "aesmc");
- case AArch64::BI__builtin_neon_vaesimcq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty),
- Ops, "aesimc");
- case AArch64::BI__builtin_neon_vsha1su1q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty),
- Ops, "sha1su1");
- case AArch64::BI__builtin_neon_vsha256su0q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty),
- Ops, "sha256su0");
- case AArch64::BI__builtin_neon_vsha1su0q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty),
- Ops, "sha1su0");
- case AArch64::BI__builtin_neon_vsha256hq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty),
- Ops, "sha256h");
- case AArch64::BI__builtin_neon_vsha256h2q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty),
- Ops, "sha256h2");
- case AArch64::BI__builtin_neon_vsha256su1q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty),
- Ops, "sha256su1");
- case AArch64::BI__builtin_neon_vmul_lane_v:
- case AArch64::BI__builtin_neon_vmul_laneq_v: {
+ case NEON::BI__builtin_neon_vmul_lane_v:
+ case NEON::BI__builtin_neon_vmul_laneq_v: {
// v1f64 vmul_lane should be mapped to Neon scalar mul lane
bool Quad = false;
- if (BuiltinID == AArch64::BI__builtin_neon_vmul_laneq_v)
+ if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
@@ -3525,7 +4104,7 @@
}
// AArch64-only builtins
- case AArch64::BI__builtin_neon_vfmaq_laneq_v: {
+ case NEON::BI__builtin_neon_vfmaq_laneq_v: {
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -3534,7 +4113,7 @@
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vfmaq_lane_v: {
+ case NEON::BI__builtin_neon_vfmaq_lane_v: {
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -3549,7 +4128,7 @@
return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vfma_lane_v: {
+ case NEON::BI__builtin_neon_vfma_lane_v: {
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
// v1f64 fma should be mapped to Neon scalar f64 fma
if (VTy && VTy->getElementType() == DoubleTy) {
@@ -3571,7 +4150,7 @@
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vfma_laneq_v: {
+ case NEON::BI__builtin_neon_vfma_laneq_v: {
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
// v1f64 fma should be mapped to Neon scalar f64 fma
if (VTy && VTy->getElementType() == DoubleTy) {
@@ -3598,8 +4177,8 @@
return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vfms_v:
- case AArch64::BI__builtin_neon_vfmsq_v: {
+ case NEON::BI__builtin_neon_vfms_v:
+ case NEON::BI__builtin_neon_vfmsq_v: {
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -3610,314 +4189,136 @@
// AArch64 intrinsic has it first.
return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
- case AArch64::BI__builtin_neon_vmaxnm_v:
- case AArch64::BI__builtin_neon_vmaxnmq_v: {
+ case NEON::BI__builtin_neon_vmaxnm_v:
+ case NEON::BI__builtin_neon_vmaxnmq_v: {
Int = Intrinsic::aarch64_neon_vmaxnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
}
- case AArch64::BI__builtin_neon_vminnm_v:
- case AArch64::BI__builtin_neon_vminnmq_v: {
+ case NEON::BI__builtin_neon_vminnm_v:
+ case NEON::BI__builtin_neon_vminnmq_v: {
Int = Intrinsic::aarch64_neon_vminnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
}
- case AArch64::BI__builtin_neon_vpmaxnm_v:
- case AArch64::BI__builtin_neon_vpmaxnmq_v: {
+ case NEON::BI__builtin_neon_vpmaxnm_v:
+ case NEON::BI__builtin_neon_vpmaxnmq_v: {
Int = Intrinsic::aarch64_neon_vpmaxnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
}
- case AArch64::BI__builtin_neon_vpminnm_v:
- case AArch64::BI__builtin_neon_vpminnmq_v: {
+ case NEON::BI__builtin_neon_vpminnm_v:
+ case NEON::BI__builtin_neon_vpminnmq_v: {
Int = Intrinsic::aarch64_neon_vpminnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
}
- case AArch64::BI__builtin_neon_vpmaxq_v: {
+ case NEON::BI__builtin_neon_vpmaxq_v: {
Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
}
- case AArch64::BI__builtin_neon_vpminq_v: {
+ case NEON::BI__builtin_neon_vpminq_v: {
Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
}
- case AArch64::BI__builtin_neon_vpaddq_v: {
- Int = Intrinsic::arm_neon_vpadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
- }
- case AArch64::BI__builtin_neon_vmulx_v:
- case AArch64::BI__builtin_neon_vmulxq_v: {
+ case NEON::BI__builtin_neon_vmulx_v:
+ case NEON::BI__builtin_neon_vmulxq_v: {
Int = Intrinsic::aarch64_neon_vmulx;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
- case AArch64::BI__builtin_neon_vpaddl_v:
- case AArch64::BI__builtin_neon_vpaddlq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpaddl_v, E);
- case AArch64::BI__builtin_neon_vpadal_v:
- case AArch64::BI__builtin_neon_vpadalq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadal_v, E);
- case AArch64::BI__builtin_neon_vqabs_v:
- case AArch64::BI__builtin_neon_vqabsq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqabs_v, E);
- case AArch64::BI__builtin_neon_vqneg_v:
- case AArch64::BI__builtin_neon_vqnegq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqneg_v, E);
- case AArch64::BI__builtin_neon_vabs_v:
- case AArch64::BI__builtin_neon_vabsq_v: {
- if (VTy->getElementType()->isFloatingPointTy()) {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
- }
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabs_v, E);
- }
- case AArch64::BI__builtin_neon_vsqadd_v:
- case AArch64::BI__builtin_neon_vsqaddq_v: {
+ case NEON::BI__builtin_neon_vsqadd_v:
+ case NEON::BI__builtin_neon_vsqaddq_v: {
Int = Intrinsic::aarch64_neon_usqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
}
- case AArch64::BI__builtin_neon_vuqadd_v:
- case AArch64::BI__builtin_neon_vuqaddq_v: {
+ case NEON::BI__builtin_neon_vuqadd_v:
+ case NEON::BI__builtin_neon_vuqaddq_v: {
Int = Intrinsic::aarch64_neon_suqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}
- case AArch64::BI__builtin_neon_vcls_v:
- case AArch64::BI__builtin_neon_vclsq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcls_v, E);
- case AArch64::BI__builtin_neon_vclz_v:
- case AArch64::BI__builtin_neon_vclzq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vclz_v, E);
- case AArch64::BI__builtin_neon_vcnt_v:
- case AArch64::BI__builtin_neon_vcntq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcnt_v, E);
- case AArch64::BI__builtin_neon_vrbit_v:
- case AArch64::BI__builtin_neon_vrbitq_v:
+ case NEON::BI__builtin_neon_vrbit_v:
+ case NEON::BI__builtin_neon_vrbitq_v:
Int = Intrinsic::aarch64_neon_rbit;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
- case AArch64::BI__builtin_neon_vmovn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovn_v, E);
- case AArch64::BI__builtin_neon_vqmovun_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovun_v, E);
- case AArch64::BI__builtin_neon_vqmovn_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovn_v, E);
- case AArch64::BI__builtin_neon_vcvt_f16_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f16_v, E);
- case AArch64::BI__builtin_neon_vcvt_f32_f16:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_f16, E);
- case AArch64::BI__builtin_neon_vcvt_f32_f64: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
+ case NEON::BI__builtin_neon_vcvt_f32_f64: {
+ NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
}
- case AArch64::BI__builtin_neon_vcvtx_f32_v: {
+ case NEON::BI__builtin_neon_vcvtx_f32_v: {
llvm::Type *EltTy = FloatTy;
llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2);
llvm::Type *Tys[2] = { ResTy, Ty };
- Int = Intrinsic::aarch64_neon_fcvtxn;
+ Int = Intrinsic::aarch64_neon_vcvtxn;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
}
- case AArch64::BI__builtin_neon_vcvt_f64_f32: {
+ case NEON::BI__builtin_neon_vcvt_f64_f32: {
llvm::Type *OpTy =
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
- case AArch64::BI__builtin_neon_vcvt_f64_v:
- case AArch64::BI__builtin_neon_vcvtq_f64_v: {
+ case NEON::BI__builtin_neon_vcvt_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_f64_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
}
- case AArch64::BI__builtin_neon_vrndn_v:
- case AArch64::BI__builtin_neon_vrndnq_v: {
+ case NEON::BI__builtin_neon_vrndn_v:
+ case NEON::BI__builtin_neon_vrndnq_v: {
Int = Intrinsic::aarch64_neon_frintn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
}
- case AArch64::BI__builtin_neon_vrnda_v:
- case AArch64::BI__builtin_neon_vrndaq_v: {
+ case NEON::BI__builtin_neon_vrnda_v:
+ case NEON::BI__builtin_neon_vrndaq_v: {
Int = Intrinsic::round;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
}
- case AArch64::BI__builtin_neon_vrndp_v:
- case AArch64::BI__builtin_neon_vrndpq_v: {
+ case NEON::BI__builtin_neon_vrndp_v:
+ case NEON::BI__builtin_neon_vrndpq_v: {
Int = Intrinsic::ceil;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
}
- case AArch64::BI__builtin_neon_vrndm_v:
- case AArch64::BI__builtin_neon_vrndmq_v: {
+ case NEON::BI__builtin_neon_vrndm_v:
+ case NEON::BI__builtin_neon_vrndmq_v: {
Int = Intrinsic::floor;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
}
- case AArch64::BI__builtin_neon_vrndx_v:
- case AArch64::BI__builtin_neon_vrndxq_v: {
+ case NEON::BI__builtin_neon_vrndx_v:
+ case NEON::BI__builtin_neon_vrndxq_v: {
Int = Intrinsic::rint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
}
- case AArch64::BI__builtin_neon_vrnd_v:
- case AArch64::BI__builtin_neon_vrndq_v: {
+ case NEON::BI__builtin_neon_vrnd_v:
+ case NEON::BI__builtin_neon_vrndq_v: {
Int = Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd");
}
- case AArch64::BI__builtin_neon_vrndi_v:
- case AArch64::BI__builtin_neon_vrndiq_v: {
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v: {
Int = Intrinsic::nearbyint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
}
- case AArch64::BI__builtin_neon_vcvt_s32_v:
- case AArch64::BI__builtin_neon_vcvt_u32_v:
- case AArch64::BI__builtin_neon_vcvtq_s32_v:
- case AArch64::BI__builtin_neon_vcvtq_u32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E);
- case AArch64::BI__builtin_neon_vcvt_s64_v:
- case AArch64::BI__builtin_neon_vcvt_u64_v:
- case AArch64::BI__builtin_neon_vcvtq_s64_v:
- case AArch64::BI__builtin_neon_vcvtq_u64_v: {
- llvm::Type *DoubleTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
- : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
- }
- case AArch64::BI__builtin_neon_vcvtn_s32_v:
- case AArch64::BI__builtin_neon_vcvtnq_s32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32");
- }
- case AArch64::BI__builtin_neon_vcvtn_s64_v:
- case AArch64::BI__builtin_neon_vcvtnq_s64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f64");
- }
- case AArch64::BI__builtin_neon_vcvtn_u32_v:
- case AArch64::BI__builtin_neon_vcvtnq_u32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtnu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32");
- }
- case AArch64::BI__builtin_neon_vcvtn_u64_v:
- case AArch64::BI__builtin_neon_vcvtnq_u64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtnu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f64");
- }
- case AArch64::BI__builtin_neon_vcvtp_s32_v:
- case AArch64::BI__builtin_neon_vcvtpq_s32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtps;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32");
- }
- case AArch64::BI__builtin_neon_vcvtp_s64_v:
- case AArch64::BI__builtin_neon_vcvtpq_s64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtps;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f64");
- }
- case AArch64::BI__builtin_neon_vcvtp_u32_v:
- case AArch64::BI__builtin_neon_vcvtpq_u32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtpu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32");
- }
- case AArch64::BI__builtin_neon_vcvtp_u64_v:
- case AArch64::BI__builtin_neon_vcvtpq_u64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtpu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f64");
- }
- case AArch64::BI__builtin_neon_vcvtm_s32_v:
- case AArch64::BI__builtin_neon_vcvtmq_s32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtms;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32");
- }
- case AArch64::BI__builtin_neon_vcvtm_s64_v:
- case AArch64::BI__builtin_neon_vcvtmq_s64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtms;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f64");
- }
- case AArch64::BI__builtin_neon_vcvtm_u32_v:
- case AArch64::BI__builtin_neon_vcvtmq_u32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtmu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32");
- }
- case AArch64::BI__builtin_neon_vcvtm_u64_v:
- case AArch64::BI__builtin_neon_vcvtmq_u64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtmu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f64");
- }
- case AArch64::BI__builtin_neon_vcvta_s32_v:
- case AArch64::BI__builtin_neon_vcvtaq_s32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtas;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32");
- }
- case AArch64::BI__builtin_neon_vcvta_s64_v:
- case AArch64::BI__builtin_neon_vcvtaq_s64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtas;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f64");
- }
- case AArch64::BI__builtin_neon_vcvta_u32_v:
- case AArch64::BI__builtin_neon_vcvtaq_u32_v: {
- llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtau;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32");
- }
- case AArch64::BI__builtin_neon_vcvta_u64_v:
- case AArch64::BI__builtin_neon_vcvtaq_u64_v: {
- llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
- llvm::Type *Tys[2] = { Ty, OpTy };
- Int = Intrinsic::aarch64_neon_fcvtau;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f64");
- }
- case AArch64::BI__builtin_neon_vrecpe_v:
- case AArch64::BI__builtin_neon_vrecpeq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpe_v, E);
- case AArch64::BI__builtin_neon_vrsqrte_v:
- case AArch64::BI__builtin_neon_vrsqrteq_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E);
- case AArch64::BI__builtin_neon_vsqrt_v:
- case AArch64::BI__builtin_neon_vsqrtq_v: {
+ case NEON::BI__builtin_neon_vsqrt_v:
+ case NEON::BI__builtin_neon_vsqrtq_v: {
Int = Intrinsic::sqrt;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
}
- case AArch64::BI__builtin_neon_vcvt_f32_v:
- case AArch64::BI__builtin_neon_vcvtq_f32_v:
- return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_v, E);
- case AArch64::BI__builtin_neon_vceqz_v:
- case AArch64::BI__builtin_neon_vceqzq_v:
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
ICmpInst::ICMP_EQ, "vceqz");
- case AArch64::BI__builtin_neon_vcgez_v:
- case AArch64::BI__builtin_neon_vcgezq_v:
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
ICmpInst::ICMP_SGE, "vcgez");
- case AArch64::BI__builtin_neon_vclez_v:
- case AArch64::BI__builtin_neon_vclezq_v:
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
ICmpInst::ICMP_SLE, "vclez");
- case AArch64::BI__builtin_neon_vcgtz_v:
- case AArch64::BI__builtin_neon_vcgtzq_v:
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
ICmpInst::ICMP_SGT, "vcgtz");
- case AArch64::BI__builtin_neon_vcltz_v:
- case AArch64::BI__builtin_neon_vcltzq_v:
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
ICmpInst::ICMP_SLT, "vcltz");
}
@@ -4074,28 +4475,28 @@
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
if (i == 0) {
switch (BuiltinID) {
- case ARM::BI__builtin_neon_vld1_v:
- case ARM::BI__builtin_neon_vld1q_v:
- case ARM::BI__builtin_neon_vld1q_lane_v:
- case ARM::BI__builtin_neon_vld1_lane_v:
- case ARM::BI__builtin_neon_vld1_dup_v:
- case ARM::BI__builtin_neon_vld1q_dup_v:
- case ARM::BI__builtin_neon_vst1_v:
- case ARM::BI__builtin_neon_vst1q_v:
- case ARM::BI__builtin_neon_vst1q_lane_v:
- case ARM::BI__builtin_neon_vst1_lane_v:
- case ARM::BI__builtin_neon_vst2_v:
- case ARM::BI__builtin_neon_vst2q_v:
- case ARM::BI__builtin_neon_vst2_lane_v:
- case ARM::BI__builtin_neon_vst2q_lane_v:
- case ARM::BI__builtin_neon_vst3_v:
- case ARM::BI__builtin_neon_vst3q_v:
- case ARM::BI__builtin_neon_vst3_lane_v:
- case ARM::BI__builtin_neon_vst3q_lane_v:
- case ARM::BI__builtin_neon_vst4_v:
- case ARM::BI__builtin_neon_vst4q_v:
- case ARM::BI__builtin_neon_vst4_lane_v:
- case ARM::BI__builtin_neon_vst4q_lane_v:
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v:
+ case NEON::BI__builtin_neon_vld1_lane_v:
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v:
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v:
+ case NEON::BI__builtin_neon_vst1_lane_v:
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v:
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v:
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v:
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v:
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v:
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value*, unsigned> Src =
@@ -4107,21 +4508,21 @@
}
if (i == 1) {
switch (BuiltinID) {
- case ARM::BI__builtin_neon_vld2_v:
- case ARM::BI__builtin_neon_vld2q_v:
- case ARM::BI__builtin_neon_vld3_v:
- case ARM::BI__builtin_neon_vld3q_v:
- case ARM::BI__builtin_neon_vld4_v:
- case ARM::BI__builtin_neon_vld4q_v:
- case ARM::BI__builtin_neon_vld2_lane_v:
- case ARM::BI__builtin_neon_vld2q_lane_v:
- case ARM::BI__builtin_neon_vld3_lane_v:
- case ARM::BI__builtin_neon_vld3q_lane_v:
- case ARM::BI__builtin_neon_vld4_lane_v:
- case ARM::BI__builtin_neon_vld4q_lane_v:
- case ARM::BI__builtin_neon_vld2_dup_v:
- case ARM::BI__builtin_neon_vld3_dup_v:
- case ARM::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v:
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v:
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v:
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value*, unsigned> Src =
@@ -4134,34 +4535,52 @@
Ops.push_back(EmitScalarExpr(E->getArg(i)));
}
- // vget_lane and vset_lane are not overloaded and do not have an extra
- // argument that specifies the vector type.
switch (BuiltinID) {
default: break;
- case ARM::BI__builtin_neon_vget_lane_i8:
- case ARM::BI__builtin_neon_vget_lane_i16:
- case ARM::BI__builtin_neon_vget_lane_i32:
- case ARM::BI__builtin_neon_vget_lane_i64:
- case ARM::BI__builtin_neon_vget_lane_f32:
- case ARM::BI__builtin_neon_vgetq_lane_i8:
- case ARM::BI__builtin_neon_vgetq_lane_i16:
- case ARM::BI__builtin_neon_vgetq_lane_i32:
- case ARM::BI__builtin_neon_vgetq_lane_i64:
- case ARM::BI__builtin_neon_vgetq_lane_f32:
+ // vget_lane and vset_lane are not overloaded and do not have an extra
+ // argument that specifies the vector type.
+ case NEON::BI__builtin_neon_vget_lane_i8:
+ case NEON::BI__builtin_neon_vget_lane_i16:
+ case NEON::BI__builtin_neon_vget_lane_i32:
+ case NEON::BI__builtin_neon_vget_lane_i64:
+ case NEON::BI__builtin_neon_vget_lane_f32:
+ case NEON::BI__builtin_neon_vgetq_lane_i8:
+ case NEON::BI__builtin_neon_vgetq_lane_i16:
+ case NEON::BI__builtin_neon_vgetq_lane_i32:
+ case NEON::BI__builtin_neon_vgetq_lane_i64:
+ case NEON::BI__builtin_neon_vgetq_lane_f32:
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
- case ARM::BI__builtin_neon_vset_lane_i8:
- case ARM::BI__builtin_neon_vset_lane_i16:
- case ARM::BI__builtin_neon_vset_lane_i32:
- case ARM::BI__builtin_neon_vset_lane_i64:
- case ARM::BI__builtin_neon_vset_lane_f32:
- case ARM::BI__builtin_neon_vsetq_lane_i8:
- case ARM::BI__builtin_neon_vsetq_lane_i16:
- case ARM::BI__builtin_neon_vsetq_lane_i32:
- case ARM::BI__builtin_neon_vsetq_lane_i64:
- case ARM::BI__builtin_neon_vsetq_lane_f32:
+ case NEON::BI__builtin_neon_vset_lane_i8:
+ case NEON::BI__builtin_neon_vset_lane_i16:
+ case NEON::BI__builtin_neon_vset_lane_i32:
+ case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_f32:
+ case NEON::BI__builtin_neon_vsetq_lane_i8:
+ case NEON::BI__builtin_neon_vsetq_lane_i16:
+ case NEON::BI__builtin_neon_vsetq_lane_i32:
+ case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_f32:
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+
+ // Non-polymorphic crypto instructions also not overloaded
+ case NEON::BI__builtin_neon_vsha1h_u32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
+ "vsha1h");
+ case NEON::BI__builtin_neon_vsha1cq_u32:
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
+ "vsha1h");
+ case NEON::BI__builtin_neon_vsha1pq_u32:
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
+ "vsha1h");
+ case NEON::BI__builtin_neon_vsha1mq_u32:
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
+ "vsha1h");
}
// Get the last argument, which specifies the vector type.
@@ -4191,7 +4610,6 @@
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
bool usgn = Type.isUnsigned();
- bool quad = Type.isQuad();
bool rightShift = false;
llvm::VectorType *VTy = GetNeonType(this, Type);
@@ -4199,158 +4617,20 @@
if (!Ty)
return 0;
+ // Many NEON builtins have identical semantics and uses in ARM and
+ // AArch64. Emit these in a single function.
+ llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
+ const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
+ if (Builtin)
+ return EmitCommonNeonBuiltinExpr(
+ Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
+
unsigned Int;
switch (BuiltinID) {
default: return 0;
- case ARM::BI__builtin_neon_vbsl_v:
- case ARM::BI__builtin_neon_vbslq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
- Ops, "vbsl");
- case ARM::BI__builtin_neon_vabd_v:
- case ARM::BI__builtin_neon_vabdq_v:
- Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
- case ARM::BI__builtin_neon_vabs_v:
- case ARM::BI__builtin_neon_vabsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
- Ops, "vabs");
- case ARM::BI__builtin_neon_vaddhn_v: {
- llvm::VectorType *SrcTy =
- llvm::VectorType::getExtendedElementVectorType(VTy);
-
- // %sum = add <4 x i32> %lhs, %rhs
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
- Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
-
- // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
- Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
- SrcTy->getScalarSizeInBits() / 2);
- ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
- Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
-
- // %res = trunc <4 x i32> %high to <4 x i16>
- return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
- }
- case ARM::BI__builtin_neon_vcale_v:
- std::swap(Ops[0], Ops[1]);
- case ARM::BI__builtin_neon_vcage_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
- return EmitNeonCall(F, Ops, "vcage");
- }
- case ARM::BI__builtin_neon_vcaleq_v:
- std::swap(Ops[0], Ops[1]);
- case ARM::BI__builtin_neon_vcageq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
- return EmitNeonCall(F, Ops, "vcage");
- }
- case ARM::BI__builtin_neon_vcalt_v:
- std::swap(Ops[0], Ops[1]);
- case ARM::BI__builtin_neon_vcagt_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
- return EmitNeonCall(F, Ops, "vcagt");
- }
- case ARM::BI__builtin_neon_vcaltq_v:
- std::swap(Ops[0], Ops[1]);
- case ARM::BI__builtin_neon_vcagtq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
- return EmitNeonCall(F, Ops, "vcagt");
- }
- case ARM::BI__builtin_neon_vcls_v:
- case ARM::BI__builtin_neon_vclsq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
- return EmitNeonCall(F, Ops, "vcls");
- }
- case ARM::BI__builtin_neon_vclz_v:
- case ARM::BI__builtin_neon_vclzq_v: {
- // Generate target-independent intrinsic; also need to add second argument
- // for whether or not clz of zero is undefined; on ARM it isn't.
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
- Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
- return EmitNeonCall(F, Ops, "vclz");
- }
- case ARM::BI__builtin_neon_vcnt_v:
- case ARM::BI__builtin_neon_vcntq_v: {
- // generate target-independent intrinsic
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
- return EmitNeonCall(F, Ops, "vctpop");
- }
- case ARM::BI__builtin_neon_vcvt_f16_v: {
- assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
- "unexpected vcvt_f16_v builtin");
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
- return EmitNeonCall(F, Ops, "vcvt");
- }
- case ARM::BI__builtin_neon_vcvt_f32_f16: {
- assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
- "unexpected vcvt_f32_f16 builtin");
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
- return EmitNeonCall(F, Ops, "vcvt");
- }
- case ARM::BI__builtin_neon_vcvt_f32_v:
- case ARM::BI__builtin_neon_vcvtq_f32_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
- return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
- : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case ARM::BI__builtin_neon_vcvt_s32_v:
- case ARM::BI__builtin_neon_vcvt_u32_v:
- case ARM::BI__builtin_neon_vcvtq_s32_v:
- case ARM::BI__builtin_neon_vcvtq_u32_v: {
- llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
- Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
- return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
- : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
- }
- case ARM::BI__builtin_neon_vcvt_n_f32_v:
- case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
- llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
- llvm::Type *Tys[2] = { FloatTy, Ty };
- Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
- : Intrinsic::arm_neon_vcvtfxs2fp;
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
- case ARM::BI__builtin_neon_vcvt_n_s32_v:
- case ARM::BI__builtin_neon_vcvt_n_u32_v:
- case ARM::BI__builtin_neon_vcvtq_n_s32_v:
- case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
- llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
- llvm::Type *Tys[2] = { Ty, FloatTy };
- Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
- : Intrinsic::arm_neon_vcvtfp2fxs;
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
- case ARM::BI__builtin_neon_vext_v:
- case ARM::BI__builtin_neon_vextq_v: {
- int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
- SmallVector<Constant*, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
-
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value *SV = llvm::ConstantVector::get(Indices);
- return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
- }
- case ARM::BI__builtin_neon_vhadd_v:
- case ARM::BI__builtin_neon_vhaddq_v:
- Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
- case ARM::BI__builtin_neon_vhsub_v:
- case ARM::BI__builtin_neon_vhsubq_v:
- Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
- case ARM::BI__builtin_neon_vld1_v:
- case ARM::BI__builtin_neon_vld1q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
- Ops, "vld1");
- case ARM::BI__builtin_neon_vld1q_lane_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v:
// Handle 64-bit integer elements as a special case. Use shuffles of
// one-element vectors to avoid poor code for i64 in the backend.
if (VTy->getElementType()->isIntegerTy(64)) {
@@ -4371,7 +4651,7 @@
return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
}
// fall through
- case ARM::BI__builtin_neon_vld1_lane_v: {
+ case NEON::BI__builtin_neon_vld1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -4379,90 +4659,19 @@
Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
}
- case ARM::BI__builtin_neon_vld1_dup_v:
- case ARM::BI__builtin_neon_vld1q_dup_v: {
- Value *V = UndefValue::get(Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- LoadInst *Ld = Builder.CreateLoad(Ops[0]);
- Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
- llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
- Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
- return EmitNeonSplat(Ops[0], CI);
- }
- case ARM::BI__builtin_neon_vld2_v:
- case ARM::BI__builtin_neon_vld2q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld3_v:
- case ARM::BI__builtin_neon_vld3q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld4_v:
- case ARM::BI__builtin_neon_vld4q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld2_lane_v:
- case ARM::BI__builtin_neon_vld2q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld3_lane_v:
- case ARM::BI__builtin_neon_vld3q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
- Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld4_lane_v:
- case ARM::BI__builtin_neon_vld4q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
- Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
- Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case ARM::BI__builtin_neon_vld2_dup_v:
- case ARM::BI__builtin_neon_vld3_dup_v:
- case ARM::BI__builtin_neon_vld4_dup_v: {
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v: {
// Handle 64-bit elements as a special-case. There is no "dup" needed.
if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
switch (BuiltinID) {
- case ARM::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
Int = Intrinsic::arm_neon_vld2;
break;
- case ARM::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
Int = Intrinsic::arm_neon_vld3;
break;
- case ARM::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
Int = Intrinsic::arm_neon_vld4;
break;
default: llvm_unreachable("unknown vld_dup intrinsic?");
@@ -4474,13 +4683,13 @@
return Builder.CreateStore(Ops[1], Ops[0]);
}
switch (BuiltinID) {
- case ARM::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
Int = Intrinsic::arm_neon_vld2lane;
break;
- case ARM::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
Int = Intrinsic::arm_neon_vld3lane;
break;
- case ARM::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
Int = Intrinsic::arm_neon_vld4lane;
break;
default: llvm_unreachable("unknown vld_dup intrinsic?");
@@ -4509,251 +4718,58 @@
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
- case ARM::BI__builtin_neon_vmax_v:
- case ARM::BI__builtin_neon_vmaxq_v:
- Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
- case ARM::BI__builtin_neon_vmin_v:
- case ARM::BI__builtin_neon_vminq_v:
- Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
- case ARM::BI__builtin_neon_vmovl_v: {
- llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
- if (usgn)
- return Builder.CreateZExt(Ops[0], Ty, "vmovl");
- return Builder.CreateSExt(Ops[0], Ty, "vmovl");
- }
- case ARM::BI__builtin_neon_vmovn_v: {
- llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
- return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
- }
- case ARM::BI__builtin_neon_vmul_v:
- case ARM::BI__builtin_neon_vmulq_v:
- assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
- Ops, "vmul");
- case ARM::BI__builtin_neon_vmull_v:
- // FIXME: the integer vmull operations could be emitted in terms of pure
- // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
- // hoisting the exts outside loops. Until global ISel comes along that can
- // see through such movement this leads to bad CodeGen. So we need an
- // intrinsic for now.
- Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
- Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
- case ARM::BI__builtin_neon_vfma_v:
- case ARM::BI__builtin_neon_vfmaq_v: {
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-
- // NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- }
- case ARM::BI__builtin_neon_vpadal_v:
- case ARM::BI__builtin_neon_vpadalq_v: {
- Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
- // The source operand type has twice as many elements of half the size.
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- llvm::Type *EltTy =
- llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- llvm::Type *NarrowTy =
- llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
- llvm::Type *Tys[2] = { Ty, NarrowTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
- }
- case ARM::BI__builtin_neon_vpadd_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
- Ops, "vpadd");
- case ARM::BI__builtin_neon_vpaddl_v:
- case ARM::BI__builtin_neon_vpaddlq_v: {
- Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
- // The source operand type has twice as many elements of half the size.
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- llvm::Type *NarrowTy =
- llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
- llvm::Type *Tys[2] = { Ty, NarrowTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
- }
- case ARM::BI__builtin_neon_vpmax_v:
- Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
- case ARM::BI__builtin_neon_vpmin_v:
- Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
- case ARM::BI__builtin_neon_vqabs_v:
- case ARM::BI__builtin_neon_vqabsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
- Ops, "vqabs");
- case ARM::BI__builtin_neon_vqadd_v:
- case ARM::BI__builtin_neon_vqaddq_v:
- Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
- case ARM::BI__builtin_neon_vqdmlal_v: {
- SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
- Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
- MulOps, "vqdmlal");
-
- SmallVector<Value *, 2> AddOps;
- AddOps.push_back(Ops[0]);
- AddOps.push_back(Mul);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
- AddOps, "vqdmlal");
- }
- case ARM::BI__builtin_neon_vqdmlsl_v: {
- SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
- Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
- MulOps, "vqdmlsl");
-
- SmallVector<Value *, 2> SubOps;
- SubOps.push_back(Ops[0]);
- SubOps.push_back(Mul);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
- SubOps, "vqdmlsl");
- }
- case ARM::BI__builtin_neon_vqdmulh_v:
- case ARM::BI__builtin_neon_vqdmulhq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
- Ops, "vqdmulh");
- case ARM::BI__builtin_neon_vqdmull_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
- Ops, "vqdmull");
- case ARM::BI__builtin_neon_vqmovn_v:
- Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
- case ARM::BI__builtin_neon_vqmovun_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
- Ops, "vqdmull");
- case ARM::BI__builtin_neon_vqneg_v:
- case ARM::BI__builtin_neon_vqnegq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
- Ops, "vqneg");
- case ARM::BI__builtin_neon_vqrdmulh_v:
- case ARM::BI__builtin_neon_vqrdmulhq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
- Ops, "vqrdmulh");
- case ARM::BI__builtin_neon_vqrshl_v:
- case ARM::BI__builtin_neon_vqrshlq_v:
- Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
- case ARM::BI__builtin_neon_vqrshrn_n_v:
+ case NEON::BI__builtin_neon_vqrshrn_n_v:
Int =
usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
1, true);
- case ARM::BI__builtin_neon_vqrshrun_n_v:
+ case NEON::BI__builtin_neon_vqrshrun_n_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
Ops, "vqrshrun_n", 1, true);
- case ARM::BI__builtin_neon_vqshl_v:
- case ARM::BI__builtin_neon_vqshlq_v:
- Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
- case ARM::BI__builtin_neon_vqshl_n_v:
- case ARM::BI__builtin_neon_vqshlq_n_v:
- Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
- 1, false);
- case ARM::BI__builtin_neon_vqshlu_n_v:
- case ARM::BI__builtin_neon_vqshluq_n_v:
+ case NEON::BI__builtin_neon_vqshlu_n_v:
+ case NEON::BI__builtin_neon_vqshluq_n_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
Ops, "vqshlu", 1, false);
- case ARM::BI__builtin_neon_vqshrn_n_v:
+ case NEON::BI__builtin_neon_vqshrn_n_v:
Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
1, true);
- case ARM::BI__builtin_neon_vqshrun_n_v:
+ case NEON::BI__builtin_neon_vqshrun_n_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
Ops, "vqshrun_n", 1, true);
- case ARM::BI__builtin_neon_vqsub_v:
- case ARM::BI__builtin_neon_vqsubq_v:
- Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
- case ARM::BI__builtin_neon_vraddhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
- Ops, "vraddhn");
- case ARM::BI__builtin_neon_vrecpe_v:
- case ARM::BI__builtin_neon_vrecpeq_v:
+ case NEON::BI__builtin_neon_vrecpe_v:
+ case NEON::BI__builtin_neon_vrecpeq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
Ops, "vrecpe");
- case ARM::BI__builtin_neon_vrecps_v:
- case ARM::BI__builtin_neon_vrecpsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
- Ops, "vrecps");
- case ARM::BI__builtin_neon_vrhadd_v:
- case ARM::BI__builtin_neon_vrhaddq_v:
- Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
- case ARM::BI__builtin_neon_vrshl_v:
- case ARM::BI__builtin_neon_vrshlq_v:
- Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
- case ARM::BI__builtin_neon_vrshrn_n_v:
+ case NEON::BI__builtin_neon_vrshrn_n_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
Ops, "vrshrn_n", 1, true);
- case ARM::BI__builtin_neon_vrshr_n_v:
- case ARM::BI__builtin_neon_vrshrq_n_v:
+ case NEON::BI__builtin_neon_vrshr_n_v:
+ case NEON::BI__builtin_neon_vrshrq_n_v:
Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
- case ARM::BI__builtin_neon_vrsqrte_v:
- case ARM::BI__builtin_neon_vrsqrteq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
- Ops, "vrsqrte");
- case ARM::BI__builtin_neon_vrsqrts_v:
- case ARM::BI__builtin_neon_vrsqrtsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
- Ops, "vrsqrts");
- case ARM::BI__builtin_neon_vrsra_n_v:
- case ARM::BI__builtin_neon_vrsraq_n_v:
+ case NEON::BI__builtin_neon_vrsra_n_v:
+ case NEON::BI__builtin_neon_vrsraq_n_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
- case ARM::BI__builtin_neon_vrsubhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
- Ops, "vrsubhn");
- case ARM::BI__builtin_neon_vshl_v:
- case ARM::BI__builtin_neon_vshlq_v:
- Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
- case ARM::BI__builtin_neon_vshll_n_v:
- Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
- case ARM::BI__builtin_neon_vshl_n_v:
- case ARM::BI__builtin_neon_vshlq_n_v:
- Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
- return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
- "vshl_n");
- case ARM::BI__builtin_neon_vshrn_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
- Ops, "vshrn_n", 1, true);
- case ARM::BI__builtin_neon_vshr_n_v:
- case ARM::BI__builtin_neon_vshrq_n_v:
- return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
- case ARM::BI__builtin_neon_vsri_n_v:
- case ARM::BI__builtin_neon_vsriq_n_v:
+ case NEON::BI__builtin_neon_vsri_n_v:
+ case NEON::BI__builtin_neon_vsriq_n_v:
rightShift = true;
- case ARM::BI__builtin_neon_vsli_n_v:
- case ARM::BI__builtin_neon_vsliq_n_v:
+ case NEON::BI__builtin_neon_vsli_n_v:
+ case NEON::BI__builtin_neon_vsliq_n_v:
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
Ops, "vsli_n");
- case ARM::BI__builtin_neon_vsra_n_v:
- case ARM::BI__builtin_neon_vsraq_n_v:
+ case NEON::BI__builtin_neon_vsra_n_v:
+ case NEON::BI__builtin_neon_vsraq_n_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
return Builder.CreateAdd(Ops[0], Ops[1]);
- case ARM::BI__builtin_neon_vst1_v:
- case ARM::BI__builtin_neon_vst1q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst1q_lane_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v:
// Handle 64-bit integer elements as a special case. Use a shuffle to get
// a one-element vector and avoid poor code for i64 in the backend.
if (VTy->getElementType()->isIntegerTy(64)) {
@@ -4765,7 +4781,7 @@
Ops[1]->getType()), Ops);
}
// fall through
- case ARM::BI__builtin_neon_vst1_lane_v: {
+ case NEON::BI__builtin_neon_vst1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
@@ -4774,89 +4790,1922 @@
St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return St;
}
- case ARM::BI__builtin_neon_vst2_v:
- case ARM::BI__builtin_neon_vst2q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst2_lane_v:
- case ARM::BI__builtin_neon_vst2q_lane_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst3_v:
- case ARM::BI__builtin_neon_vst3q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst3_lane_v:
- case ARM::BI__builtin_neon_vst3q_lane_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst4_v:
- case ARM::BI__builtin_neon_vst4q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vst4_lane_v:
- case ARM::BI__builtin_neon_vst4q_lane_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
- Ops, "");
- case ARM::BI__builtin_neon_vsubhn_v: {
- llvm::VectorType *SrcTy =
- llvm::VectorType::getExtendedElementVectorType(VTy);
-
- // %sum = add <4 x i32> %lhs, %rhs
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
- Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
-
- // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
- Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
- SrcTy->getScalarSizeInBits() / 2);
- ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
- Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
-
- // %res = trunc <4 x i32> %high to <4 x i16>
- return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
- }
- case ARM::BI__builtin_neon_vtbl1_v:
+ case NEON::BI__builtin_neon_vtbl1_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
Ops, "vtbl1");
- case ARM::BI__builtin_neon_vtbl2_v:
+ case NEON::BI__builtin_neon_vtbl2_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
Ops, "vtbl2");
- case ARM::BI__builtin_neon_vtbl3_v:
+ case NEON::BI__builtin_neon_vtbl3_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
Ops, "vtbl3");
- case ARM::BI__builtin_neon_vtbl4_v:
+ case NEON::BI__builtin_neon_vtbl4_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
Ops, "vtbl4");
- case ARM::BI__builtin_neon_vtbx1_v:
+ case NEON::BI__builtin_neon_vtbx1_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
Ops, "vtbx1");
- case ARM::BI__builtin_neon_vtbx2_v:
+ case NEON::BI__builtin_neon_vtbx2_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
Ops, "vtbx2");
- case ARM::BI__builtin_neon_vtbx3_v:
+ case NEON::BI__builtin_neon_vtbx3_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
Ops, "vtbx3");
- case ARM::BI__builtin_neon_vtbx4_v:
+ case NEON::BI__builtin_neon_vtbx4_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
Ops, "vtbx4");
- case ARM::BI__builtin_neon_vtst_v:
- case ARM::BI__builtin_neon_vtstq_v: {
+ }
+}
+
+static Value *EmitARM64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
+ const CallExpr *E,
+ SmallVectorImpl<Value *> &Ops) {
+ unsigned int Int = 0;
+ const char *s = NULL;
+
+ unsigned TblPos;
+ switch (BuiltinID) {
+ default:
+ return 0;
+ case NEON::BI__builtin_neon_vtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1q_v:
+ case NEON::BI__builtin_neon_vtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2q_v:
+ case NEON::BI__builtin_neon_vtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3q_v:
+ case NEON::BI__builtin_neon_vtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4q_v:
+ TblPos = 0;
+ break;
+ case NEON::BI__builtin_neon_vtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1q_v:
+ case NEON::BI__builtin_neon_vtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2q_v:
+ case NEON::BI__builtin_neon_vtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3q_v:
+ case NEON::BI__builtin_neon_vtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4q_v:
+ TblPos = 1;
+ break;
+ }
+
+ assert(E->getNumArgs() >= 3);
+
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+ return 0;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(Result.getZExtValue());
+ llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+
+ Arg = E->getArg(TblPos);
+ unsigned nElts = VTy->getNumElements();
+
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+
+ // AArch64 scalar builtins are not overloaded, they do not have an extra
+ // argument that specifies the vector type, need to handle each case.
+ SmallVector<Value *, 2> TblOps;
+ switch (BuiltinID) {
+ case NEON::BI__builtin_neon_vtbl1_v: {
+ TblOps.push_back(Ops[0]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
+ Intrinsic::arm64_neon_tbl1, "vtbl1");
+ }
+ case NEON::BI__builtin_neon_vtbl2_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+ Intrinsic::arm64_neon_tbl1, "vtbl1");
+ }
+ case NEON::BI__builtin_neon_vtbl3_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
+ Intrinsic::arm64_neon_tbl2, "vtbl2");
+ }
+ case NEON::BI__builtin_neon_vtbl4_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+ Intrinsic::arm64_neon_tbl2, "vtbl2");
+ }
+ case NEON::BI__builtin_neon_vtbx1_v: {
+ TblOps.push_back(Ops[1]);
+ Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+ Intrinsic::arm64_neon_tbl1, "vtbl1");
+
+ llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+ Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+ Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+ CmpRes = Builder.CreateSExt(CmpRes, Ty);
+
+ Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
+ Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
+ return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
+ }
+ case NEON::BI__builtin_neon_vtbx2_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
+ Intrinsic::arm64_neon_tbx1, "vtbx1");
+ }
+ case NEON::BI__builtin_neon_vtbx3_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+ Intrinsic::arm64_neon_tbl2, "vtbl2");
+
+ llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+ Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+ Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+ TwentyFourV);
+ CmpRes = Builder.CreateSExt(CmpRes, Ty);
+
+ Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
+ Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
+ return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
+ }
+ case NEON::BI__builtin_neon_vtbx4_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ TblOps.push_back(Ops[4]);
+ return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
+ Intrinsic::arm64_neon_tbx2, "vtbx2");
+ }
+ case NEON::BI__builtin_neon_vqtbl1_v:
+ case NEON::BI__builtin_neon_vqtbl1q_v:
+ Int = Intrinsic::arm64_neon_tbl1; s = "vtbl1"; break;
+ case NEON::BI__builtin_neon_vqtbl2_v:
+ case NEON::BI__builtin_neon_vqtbl2q_v: {
+ Int = Intrinsic::arm64_neon_tbl2; s = "vtbl2"; break;
+ case NEON::BI__builtin_neon_vqtbl3_v:
+ case NEON::BI__builtin_neon_vqtbl3q_v:
+ Int = Intrinsic::arm64_neon_tbl3; s = "vtbl3"; break;
+ case NEON::BI__builtin_neon_vqtbl4_v:
+ case NEON::BI__builtin_neon_vqtbl4q_v:
+ Int = Intrinsic::arm64_neon_tbl4; s = "vtbl4"; break;
+ case NEON::BI__builtin_neon_vqtbx1_v:
+ case NEON::BI__builtin_neon_vqtbx1q_v:
+ Int = Intrinsic::arm64_neon_tbx1; s = "vtbx1"; break;
+ case NEON::BI__builtin_neon_vqtbx2_v:
+ case NEON::BI__builtin_neon_vqtbx2q_v:
+ Int = Intrinsic::arm64_neon_tbx2; s = "vtbx2"; break;
+ case NEON::BI__builtin_neon_vqtbx3_v:
+ case NEON::BI__builtin_neon_vqtbx3q_v:
+ Int = Intrinsic::arm64_neon_tbx3; s = "vtbx3"; break;
+ case NEON::BI__builtin_neon_vqtbx4_v:
+ case NEON::BI__builtin_neon_vqtbx4q_v:
+ Int = Intrinsic::arm64_neon_tbx4; s = "vtbx4"; break;
+ }
+ }
+
+ if (!Int)
+ return 0;
+
+ Function *F = CGF.CGM.getIntrinsic(Int, Ty);
+ return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
+ llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
+ Op = Builder.CreateBitCast(Op, Int16Ty);
+ Value *V = UndefValue::get(VTy);
+ llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Op = Builder.CreateInsertElement(V, Op, CI);
+ return Op;
+}
+
+Value *CodeGenFunction::vectorWrapScalar8(Value *Op) {
+ llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
+ Op = Builder.CreateBitCast(Op, Int8Ty);
+ Value *V = UndefValue::get(VTy);
+ llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Op = Builder.CreateInsertElement(V, Op, CI);
+ return Op;
+}
+
+Value *CodeGenFunction::
+emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
+ const char *Name) {
+ // i8 is not a legal types for ARM64, so we can't just use
+ // a normal overloaed intrinsic call for these scalar types. Instead
+ // we'll build 64-bit vectors w/ lane zero being our input values and
+ // perform the operation on that. The back end can pattern match directly
+ // to the scalar instruction.
+ Ops[0] = vectorWrapScalar8(Ops[0]);
+ Ops[1] = vectorWrapScalar8(Ops[1]);
+ llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
+ Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
+ Constant *CI = ConstantInt::get(Int32Ty, 0);
+ return Builder.CreateExtractElement(V, CI, "lane0");
+}
+
+Value *CodeGenFunction::
+emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
+ const char *Name) {
+ // i16 is not a legal types for ARM64, so we can't just use
+ // a normal overloaed intrinsic call for these scalar types. Instead
+ // we'll build 64-bit vectors w/ lane zero being our input values and
+ // perform the operation on that. The back end can pattern match directly
+ // to the scalar instruction.
+ Ops[0] = vectorWrapScalar16(Ops[0]);
+ Ops[1] = vectorWrapScalar16(Ops[1]);
+ llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
+ Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
+ Constant *CI = ConstantInt::get(Int32Ty, 0);
+ return Builder.CreateExtractElement(V, CI, "lane0");
+}
+
+Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ if (BuiltinID == ARM64::BI__clear_cache) {
+ assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
+ const FunctionDecl *FD = E->getDirectCallee();
+ SmallVector<Value*, 2> Ops;
+ for (unsigned i = 0; i < 2; i++)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
+ llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
+ StringRef Name = FD->getName();
+ return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
+ }
+
+ if (BuiltinID == ARM64::BI__builtin_arm_ldrex &&
+ getContext().getTypeSize(E->getType()) == 128) {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_ldxp);
+
+ Value *LdPtr = EmitScalarExpr(E->getArg(0));
+ Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
+ "ldxp");
+
+ Value *Val0 = Builder.CreateExtractValue(Val, 1);
+ Value *Val1 = Builder.CreateExtractValue(Val, 0);
+ llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+ Val0 = Builder.CreateZExt(Val0, Int128Ty);
+ Val1 = Builder.CreateZExt(Val1, Int128Ty);
+
+ Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
+ Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
+ Val = Builder.CreateOr(Val, Val1);
+ return Builder.CreateBitCast(Val, ConvertType(E->getType()));
+ } else if (BuiltinID == ARM64::BI__builtin_arm_ldrex) {
+ Value *LoadAddr = EmitScalarExpr(E->getArg(0));
+
+ QualType Ty = E->getType();
+ llvm::Type *RealResTy = ConvertType(Ty);
+ llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
+ getContext().getTypeSize(Ty));
+ LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
+
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_ldxr, LoadAddr->getType());
+ Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
+
+ if (RealResTy->isPointerTy())
+ return Builder.CreateIntToPtr(Val, RealResTy);
+
+ Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
+ return Builder.CreateBitCast(Val, RealResTy);
+ }
+
+ if (BuiltinID == ARM64::BI__builtin_arm_strex &&
+ getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_stxp);
+ llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL);
+
+ Value *One = llvm::ConstantInt::get(Int32Ty, 1);
+ Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()),
+ One);
+ Value *Val = EmitScalarExpr(E->getArg(0));
+ Builder.CreateStore(Val, Tmp);
+
+ Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
+ Val = Builder.CreateLoad(LdPtr);
+
+ Value *Arg0 = Builder.CreateExtractValue(Val, 0);
+ Value *Arg1 = Builder.CreateExtractValue(Val, 1);
+ Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
+ Int8PtrTy);
+ return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp");
+ } else if (BuiltinID == ARM64::BI__builtin_arm_strex) {
+ Value *StoreVal = EmitScalarExpr(E->getArg(0));
+ Value *StoreAddr = EmitScalarExpr(E->getArg(1));
+
+ QualType Ty = E->getArg(0)->getType();
+ llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
+ getContext().getTypeSize(Ty));
+ StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
+
+ if (StoreVal->getType()->isPointerTy())
+ StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
+ else {
+ StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
+ StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
+ }
+
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_stxr, StoreAddr->getType());
+ return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr");
+ }
+
+ if (BuiltinID == ARM64::BI__builtin_arm_clrex) {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_clrex);
+ return Builder.CreateCall(F);
+ }
+
+ // CRC32
+ Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
+ switch (BuiltinID) {
+ case ARM64::BI__builtin_arm_crc32b:
+ CRCIntrinsicID = Intrinsic::arm64_crc32b; break;
+ case ARM64::BI__builtin_arm_crc32cb:
+ CRCIntrinsicID = Intrinsic::arm64_crc32cb; break;
+ case ARM64::BI__builtin_arm_crc32h:
+ CRCIntrinsicID = Intrinsic::arm64_crc32h; break;
+ case ARM64::BI__builtin_arm_crc32ch:
+ CRCIntrinsicID = Intrinsic::arm64_crc32ch; break;
+ case ARM64::BI__builtin_arm_crc32w:
+ CRCIntrinsicID = Intrinsic::arm64_crc32w; break;
+ case ARM64::BI__builtin_arm_crc32cw:
+ CRCIntrinsicID = Intrinsic::arm64_crc32cw; break;
+ case ARM64::BI__builtin_arm_crc32d:
+ CRCIntrinsicID = Intrinsic::arm64_crc32x; break;
+ case ARM64::BI__builtin_arm_crc32cd:
+ CRCIntrinsicID = Intrinsic::arm64_crc32cx; break;
+ }
+
+ if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ Function *F = CGM.getIntrinsic(CRCIntrinsicID);
+
+ llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
+ Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
+
+ return Builder.CreateCall2(F, Arg0, Arg1);
+ }
+
+ llvm::SmallVector<Value*, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+
+ llvm::ArrayRef<NeonIntrinsicInfo> SISDMap(ARM64SISDIntrinsicMap);
+ const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ SISDMap, BuiltinID, ARM64SISDIntrinsicsProvenSorted);
+
+ if (Builtin) {
+ Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
+ Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
+ assert(Result && "SISD intrinsic should have been handled");
+ return Result;
+ }
+
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs()-1);
+ NeonTypeFlags Type(0);
+ if (Arg->isIntegerConstantExpr(Result, getContext()))
+ // Determine the type of this overloaded NEON intrinsic.
+ Type = NeonTypeFlags(Result.getZExtValue());
+
+ bool usgn = Type.isUnsigned();
+ bool quad = Type.isQuad();
+
+ // Handle non-overloaded intrinsics first.
+ switch (BuiltinID) {
+ default: break;
+ case NEON::BI__builtin_neon_vldrq_p128: {
+ llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+ Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
+ return Builder.CreateLoad(Ptr);
+ }
+ case NEON::BI__builtin_neon_vstrq_p128: {
+ llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+ Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
+ return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr);
+ }
+ case NEON::BI__builtin_neon_vcvts_u32_f32:
+ case NEON::BI__builtin_neon_vcvtd_u64_f64:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvts_s32_f32:
+ case NEON::BI__builtin_neon_vcvtd_s64_f64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
+ llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
+ llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
+ Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], InTy);
+ return Builder.CreateFPToSI(Ops[0], InTy);
+ }
+ case NEON::BI__builtin_neon_vcvts_f32_u32:
+ case NEON::BI__builtin_neon_vcvtd_f64_u64:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvts_f32_s32:
+ case NEON::BI__builtin_neon_vcvtd_f64_s64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
+ llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
+ llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
+ Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+ if (usgn)
+ return Builder.CreateUIToFP(Ops[0], FTy);
+ return Builder.CreateSIToFP(Ops[0], FTy);
+ }
+ case NEON::BI__builtin_neon_vpaddd_s64: {
+ llvm::Type *Ty =
+ llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2);
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ // The vector is v2f64, so make sure it's bitcast to that.
+ Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
+ llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0);
+ llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1);
+ Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ // Pairwise addition of a v2f64 into a scalar f64.
+ return Builder.CreateAdd(Op0, Op1, "vpaddd");
+ }
+ case NEON::BI__builtin_neon_vpaddd_f64: {
+ llvm::Type *Ty =
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2);
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ // The vector is v2f64, so make sure it's bitcast to that.
+ Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
+ llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0);
+ llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1);
+ Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ // Pairwise addition of a v2f64 into a scalar f64.
+ return Builder.CreateFAdd(Op0, Op1, "vpaddd");
+ }
+ case NEON::BI__builtin_neon_vpadds_f32: {
+ llvm::Type *Ty =
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2);
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ // The vector is v2f32, so make sure it's bitcast to that.
+ Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
+ llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0);
+ llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1);
+ Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ // Pairwise addition of a v2f32 into a scalar f32.
+ return Builder.CreateFAdd(Op0, Op1, "vpaddd");
+ }
+ case NEON::BI__builtin_neon_vceqzd_s64:
+ case NEON::BI__builtin_neon_vceqzd_f64:
+ case NEON::BI__builtin_neon_vceqzs_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ,
+ ICmpInst::ICMP_EQ, "vceqz");
+ case NEON::BI__builtin_neon_vcgezd_s64:
+ case NEON::BI__builtin_neon_vcgezd_f64:
+ case NEON::BI__builtin_neon_vcgezs_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE,
+ ICmpInst::ICMP_SGE, "vcgez");
+ case NEON::BI__builtin_neon_vclezd_s64:
+ case NEON::BI__builtin_neon_vclezd_f64:
+ case NEON::BI__builtin_neon_vclezs_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE,
+ ICmpInst::ICMP_SLE, "vclez");
+ case NEON::BI__builtin_neon_vcgtzd_s64:
+ case NEON::BI__builtin_neon_vcgtzd_f64:
+ case NEON::BI__builtin_neon_vcgtzs_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT,
+ ICmpInst::ICMP_SGT, "vcgtz");
+ case NEON::BI__builtin_neon_vcltzd_s64:
+ case NEON::BI__builtin_neon_vcltzd_f64:
+ case NEON::BI__builtin_neon_vcltzs_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT,
+ ICmpInst::ICMP_SLT, "vcltz");
+
+ case NEON::BI__builtin_neon_vceqzd_u64: {
+ llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0],
+ llvm::Constant::getNullValue(Ty));
+ return Builder.CreateSExt(Ops[0], Ty, "vceqzd");
+ }
+ case NEON::BI__builtin_neon_vceqd_f64:
+ case NEON::BI__builtin_neon_vcled_f64:
+ case NEON::BI__builtin_neon_vcltd_f64:
+ case NEON::BI__builtin_neon_vcged_f64:
+ case NEON::BI__builtin_neon_vcgtd_f64: {
+ llvm::CmpInst::Predicate P;
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
+ case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
+ case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
+ case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
+ case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
+ }
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+ Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+ return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
+ }
+ case NEON::BI__builtin_neon_vceqs_f32:
+ case NEON::BI__builtin_neon_vcles_f32:
+ case NEON::BI__builtin_neon_vclts_f32:
+ case NEON::BI__builtin_neon_vcges_f32:
+ case NEON::BI__builtin_neon_vcgts_f32: {
+ llvm::CmpInst::Predicate P;
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
+ case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
+ case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
+ case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
+ case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
+ }
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
+ Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+ return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
+ }
+ case NEON::BI__builtin_neon_vceqd_s64:
+ case NEON::BI__builtin_neon_vceqd_u64:
+ case NEON::BI__builtin_neon_vcgtd_s64:
+ case NEON::BI__builtin_neon_vcgtd_u64:
+ case NEON::BI__builtin_neon_vcltd_s64:
+ case NEON::BI__builtin_neon_vcltd_u64:
+ case NEON::BI__builtin_neon_vcged_u64:
+ case NEON::BI__builtin_neon_vcged_s64:
+ case NEON::BI__builtin_neon_vcled_u64:
+ case NEON::BI__builtin_neon_vcled_s64: {
+ llvm::CmpInst::Predicate P;
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vceqd_s64:
+ case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
+ case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
+ case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
+ case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
+ case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
+ case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
+ case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
+ case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
+ case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
+ }
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+ Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
+ return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
+ }
+ case NEON::BI__builtin_neon_vtstd_s64:
+ case NEON::BI__builtin_neon_vtstd_u64: {
+ llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
- ConstantAggregateZero::get(Ty));
- return Builder.CreateSExt(Ops[0], Ty, "vtst");
+ llvm::Constant::getNullValue(Ty));
+ return Builder.CreateSExt(Ops[0], Ty, "vtstd");
}
- case ARM::BI__builtin_neon_vtrn_v:
- case ARM::BI__builtin_neon_vtrnq_v: {
+ case NEON::BI__builtin_neon_vset_lane_i8:
+ case NEON::BI__builtin_neon_vset_lane_i16:
+ case NEON::BI__builtin_neon_vset_lane_i32:
+ case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_f32:
+ case NEON::BI__builtin_neon_vsetq_lane_i8:
+ case NEON::BI__builtin_neon_vsetq_lane_i16:
+ case NEON::BI__builtin_neon_vsetq_lane_i32:
+ case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_f32:
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+ case NEON::BI__builtin_neon_vset_lane_f64:
+ // The vector type needs a cast for the v1f64 variant.
+ Ops[1] = Builder.CreateBitCast(Ops[1],
+ llvm::VectorType::get(DoubleTy, 1));
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+ case NEON::BI__builtin_neon_vsetq_lane_f64:
+ // The vector type needs a cast for the v2f64 variant.
+ Ops[1] = Builder.CreateBitCast(Ops[1],
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+
+ case NEON::BI__builtin_neon_vget_lane_i8:
+ case NEON::BI__builtin_neon_vdupb_lane_i8:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_i8:
+ case NEON::BI__builtin_neon_vdupb_laneq_i8:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vget_lane_i16:
+ case NEON::BI__builtin_neon_vduph_lane_i16:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_i16:
+ case NEON::BI__builtin_neon_vduph_laneq_i16:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vget_lane_i32:
+ case NEON::BI__builtin_neon_vdups_lane_i32:
+ Ops[0] = Builder.CreateBitCast(
+ Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vdups_lane_f32:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vdups_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_i32:
+ case NEON::BI__builtin_neon_vdups_laneq_i32:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vget_lane_i64:
+ case NEON::BI__builtin_neon_vdupd_lane_i64:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vdupd_lane_f64:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vdupd_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_i64:
+ case NEON::BI__builtin_neon_vdupd_laneq_i64:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vget_lane_f32:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vget_lane_f64:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vget_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_f32:
+ case NEON::BI__builtin_neon_vdups_laneq_f32:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vgetq_lane_f64:
+ case NEON::BI__builtin_neon_vdupd_laneq_f64:
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+ return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+ "vgetq_lane");
+ case NEON::BI__builtin_neon_vaddd_s64:
+ case NEON::BI__builtin_neon_vaddd_u64:
+ return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
+ case NEON::BI__builtin_neon_vsubd_s64:
+ case NEON::BI__builtin_neon_vsubd_u64:
+ return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
+ case NEON::BI__builtin_neon_vqdmlalh_s16:
+ case NEON::BI__builtin_neon_vqdmlslh_s16: {
+ SmallVector<Value *, 2> ProductOps;
+ ProductOps.push_back(vectorWrapScalar16(Ops[1]));
+ ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
+ llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+ Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmull, VTy),
+ ProductOps, "vqdmlXl");
+ Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
+
+ unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
+ ? Intrinsic::arm64_neon_sqadd
+ : Intrinsic::arm64_neon_sqsub;
+ return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
+ }
+ case NEON::BI__builtin_neon_vqshlud_n_s64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
+ llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqshlu, VTy),
+ Ops, "vqshlu_n");
+ return Builder.CreateBitCast(Ops[0], Int64Ty);
+ }
+ case NEON::BI__builtin_neon_vqshld_n_u64:
+ case NEON::BI__builtin_neon_vqshld_n_s64: {
+ unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
+ ? Intrinsic::arm64_neon_uqshl
+ : Intrinsic::arm64_neon_sqshl;
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
+ llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vqshl_n");
+ return Builder.CreateBitCast(Ops[0], Int64Ty);
+ }
+ case NEON::BI__builtin_neon_vrshrd_n_u64:
+ case NEON::BI__builtin_neon_vrshrd_n_s64: {
+ unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
+ ? Intrinsic::arm64_neon_urshl
+ : Intrinsic::arm64_neon_srshl;
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vrshr_n", 1, true);
+ return Builder.CreateBitCast(Ops[0], Int64Ty);
+ }
+ case NEON::BI__builtin_neon_vrsrad_n_u64:
+ case NEON::BI__builtin_neon_vrsrad_n_s64: {
+ unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
+ ? Intrinsic::arm64_neon_urshl
+ : Intrinsic::arm64_neon_srshl;
+ Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+ Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
+ Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
+ Builder.CreateSExt(Ops[2], Int64Ty));
+ return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
+ }
+ case NEON::BI__builtin_neon_vshld_n_s64:
+ case NEON::BI__builtin_neon_vshld_n_u64: {
+ llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateShl(
+ Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+ Amt->getZExtValue())),
+ "vshr_n");
+ }
+ case NEON::BI__builtin_neon_vshrd_n_s64: {
+ llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateAShr(
+ Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+ Amt->getZExtValue())),
+ "vshr_n");
+ }
+ case NEON::BI__builtin_neon_vshrd_n_u64: {
+ llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateLShr(
+ Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+ Amt->getZExtValue())),
+ "vshr_n");
+ }
+ case NEON::BI__builtin_neon_vsrad_n_s64: {
+ llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
+ Ops[1] = Builder.CreateAShr(
+ Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+ Amt->getZExtValue())),
+ "vshr_n");
+ return Builder.CreateAdd(Ops[0], Ops[1]);
+ }
+ case NEON::BI__builtin_neon_vsrad_n_u64: {
+ llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
+ Ops[1] = Builder.CreateLShr(
+ Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+ Amt->getZExtValue())),
+ "vshr_n");
+ return Builder.CreateAdd(Ops[0], Ops[1]);
+ }
+ case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
+ case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
+ case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
+ case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
+ Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
+ "lane");
+ SmallVector<Value *, 2> ProductOps;
+ ProductOps.push_back(vectorWrapScalar16(Ops[1]));
+ ProductOps.push_back(vectorWrapScalar16(Ops[2]));
+ llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+ Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmull, VTy),
+ ProductOps, "vqdmlXl");
+ Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
+ Ops.pop_back();
+
+ unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
+ ? Intrinsic::arm64_neon_sqadd
+ : Intrinsic::arm64_neon_sqsub;
+ return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
+ }
+ case NEON::BI__builtin_neon_vqdmlals_s32:
+ case NEON::BI__builtin_neon_vqdmlsls_s32: {
+ SmallVector<Value *, 2> ProductOps;
+ ProductOps.push_back(Ops[1]);
+ ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
+ Ops[1] =
+ EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmulls_scalar),
+ ProductOps, "vqdmlXl");
+
+ unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
+ ? Intrinsic::arm64_neon_sqadd
+ : Intrinsic::arm64_neon_sqsub;
+ return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
+ }
+ case NEON::BI__builtin_neon_vqdmlals_lane_s32:
+ case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
+ case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
+ case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
+ Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
+ "lane");
+ SmallVector<Value *, 2> ProductOps;
+ ProductOps.push_back(Ops[1]);
+ ProductOps.push_back(Ops[2]);
+ Ops[1] =
+ EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmulls_scalar),
+ ProductOps, "vqdmlXl");
+ Ops.pop_back();
+
+ unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
+ ? Intrinsic::arm64_neon_sqadd
+ : Intrinsic::arm64_neon_sqsub;
+ return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
+ }
+ }
+
+ llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+
+ // Not all intrinsics handled by the common case work for ARM64 yet, so only
+ // defer to common code if it's been added to our special map.
+ Builtin = findNeonIntrinsicInMap(ARM64SIMDIntrinsicMap, BuiltinID,
+ ARM64SIMDIntrinsicsProvenSorted);
+
+ if (Builtin)
+ return EmitCommonNeonBuiltinExpr(
+ Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, 0);
+
+ if (Value *V = EmitARM64TblBuiltinExpr(*this, BuiltinID, E, Ops))
+ return V;
+
+ unsigned Int;
+ switch (BuiltinID) {
+ default: return 0;
+ case NEON::BI__builtin_neon_vbsl_v:
+ case NEON::BI__builtin_neon_vbslq_v: {
+ llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
+ Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
+ Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
+ Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
+
+ Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
+ Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
+ Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
+ return Builder.CreateBitCast(Ops[0], Ty);
+ }
+ case NEON::BI__builtin_neon_vfma_lane_v:
+ case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
+ // The ARM builtins (and instructions) have the addend as the first
+ // operand, but the 'fma' intrinsics have it last. Swap it around here.
+ Value *Addend = Ops[0];
+ Value *Multiplicand = Ops[1];
+ Value *LaneSource = Ops[2];
+ Ops[0] = Multiplicand;
+ Ops[1] = LaneSource;
+ Ops[2] = Addend;
+
+ // Now adjust things to handle the lane access.
+ llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
+ llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
+ VTy;
+ llvm::Constant *cst = cast<Constant>(Ops[3]);
+ Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
+ Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
+ Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
+
+ Ops.pop_back();
+ Int = Intrinsic::fma;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
+ }
+ case NEON::BI__builtin_neon_vfma_laneq_v: {
+ llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+ // v1f64 fma should be mapped to Neon scalar f64 fma
+ if (VTy && VTy->getElementType() == DoubleTy) {
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+ llvm::Type *VTy = GetNeonType(this,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
+ Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ return Builder.CreateBitCast(Result, Ty);
+ }
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+
+ llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
+ VTy->getNumElements() * 2);
+ Ops[2] = Builder.CreateBitCast(Ops[2], STy);
+ Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
+ cast<ConstantInt>(Ops[3]));
+ Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
+
+ return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vfmaq_laneq_v: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
+ return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vfmas_lane_f32:
+ case NEON::BI__builtin_neon_vfmas_laneq_f32:
+ case NEON::BI__builtin_neon_vfmad_lane_f64:
+ case NEON::BI__builtin_neon_vfmad_laneq_f64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(3)));
+ llvm::Type *Ty = ConvertType(E->getCallReturnType());
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vfms_v:
+ case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types
+ // FIXME: probably remove when we no longer support aarch64_simd.h
+ // (arm_neon.h delegates to vfma).
+
+ // The ARM builtins (and instructions) have the addend as the first
+ // operand, but the 'fma' intrinsics have it last. Swap it around here.
+ Value *Subtrahend = Ops[0];
+ Value *Multiplicand = Ops[2];
+ Ops[0] = Multiplicand;
+ Ops[2] = Subtrahend;
+ Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+ Ops[1] = Builder.CreateFNeg(Ops[1]);
+ Int = Intrinsic::fma;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
+ }
+ case NEON::BI__builtin_neon_vmull_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_umull : Intrinsic::arm64_neon_smull;
+ if (Type.isPoly()) Int = Intrinsic::arm64_neon_pmull;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
+ case NEON::BI__builtin_neon_vmax_v:
+ case NEON::BI__builtin_neon_vmaxq_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_umax : Intrinsic::arm64_neon_smax;
+ if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmax;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+ case NEON::BI__builtin_neon_vmin_v:
+ case NEON::BI__builtin_neon_vminq_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_umin : Intrinsic::arm64_neon_smin;
+ if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmin;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+ case NEON::BI__builtin_neon_vabd_v:
+ case NEON::BI__builtin_neon_vabdq_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_uabd : Intrinsic::arm64_neon_sabd;
+ if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fabd;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
+ case NEON::BI__builtin_neon_vpadal_v:
+ case NEON::BI__builtin_neon_vpadalq_v: {
+ unsigned ArgElts = VTy->getNumElements();
+ llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
+ unsigned BitWidth = EltTy->getBitWidth();
+ llvm::Type *ArgTy = llvm::VectorType::get(
+ llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
+ llvm::Type* Tys[2] = { VTy, ArgTy };
+ Int = usgn ? Intrinsic::arm64_neon_uaddlp : Intrinsic::arm64_neon_saddlp;
+ SmallVector<llvm::Value*, 1> TmpOps;
+ TmpOps.push_back(Ops[1]);
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
+ llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
+ return Builder.CreateAdd(tmp, addend);
+ }
+ case NEON::BI__builtin_neon_vpmin_v:
+ case NEON::BI__builtin_neon_vpminq_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_uminp : Intrinsic::arm64_neon_sminp;
+ if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fminp;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
+ case NEON::BI__builtin_neon_vpmax_v:
+ case NEON::BI__builtin_neon_vpmaxq_v:
+ // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+ Int = usgn ? Intrinsic::arm64_neon_umaxp : Intrinsic::arm64_neon_smaxp;
+ if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmaxp;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
+ case NEON::BI__builtin_neon_vminnm_v:
+ case NEON::BI__builtin_neon_vminnmq_v:
+ Int = Intrinsic::arm64_neon_fminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+ case NEON::BI__builtin_neon_vmaxnm_v:
+ case NEON::BI__builtin_neon_vmaxnmq_v:
+ Int = Intrinsic::arm64_neon_fmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+ case NEON::BI__builtin_neon_vrecpss_f32: {
+ llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext());
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_frecps, f32Type),
+ Ops, "vrecps");
+ }
+ case NEON::BI__builtin_neon_vrecpsd_f64: {
+ llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext());
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_frecps, f64Type),
+ Ops, "vrecps");
+ }
+ case NEON::BI__builtin_neon_vrshr_n_v:
+ case NEON::BI__builtin_neon_vrshrq_n_v:
+ // FIXME: this can be shared with 32-bit ARM, but not AArch64 at the
+ // moment. After the final merge it should be added to
+ // EmitCommonNeonBuiltinExpr.
+ Int = usgn ? Intrinsic::arm64_neon_urshl : Intrinsic::arm64_neon_srshl;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
+ case NEON::BI__builtin_neon_vqshlu_n_v:
+ case NEON::BI__builtin_neon_vqshluq_n_v:
+ // FIXME: AArch64 and ARM use different intrinsics for this, but are
+ // essentially compatible. It should be in EmitCommonNeonBuiltinExpr after
+ // the final merge.
+ Int = Intrinsic::arm64_neon_sqshlu;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 1, false);
+ case NEON::BI__builtin_neon_vqshrun_n_v:
+ // FIXME: as above
+ Int = Intrinsic::arm64_neon_sqshrun;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
+ case NEON::BI__builtin_neon_vqrshrun_n_v:
+ // FIXME: and again.
+ Int = Intrinsic::arm64_neon_sqrshrun;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
+ case NEON::BI__builtin_neon_vqshrn_n_v:
+ // FIXME: guess
+ Int = usgn ? Intrinsic::arm64_neon_uqshrn : Intrinsic::arm64_neon_sqshrn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
+ case NEON::BI__builtin_neon_vrshrn_n_v:
+ // FIXME: there might be a pattern here.
+ Int = Intrinsic::arm64_neon_rshrn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
+ case NEON::BI__builtin_neon_vqrshrn_n_v:
+ // FIXME: another one
+ Int = usgn ? Intrinsic::arm64_neon_uqrshrn : Intrinsic::arm64_neon_sqrshrn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
+ case NEON::BI__builtin_neon_vrnda_v:
+ case NEON::BI__builtin_neon_vrndaq_v: {
+ Int = Intrinsic::round;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
+ }
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v: {
+ Int = Intrinsic::nearbyint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
+ }
+ case NEON::BI__builtin_neon_vrndm_v:
+ case NEON::BI__builtin_neon_vrndmq_v: {
+ Int = Intrinsic::floor;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
+ }
+ case NEON::BI__builtin_neon_vrndn_v:
+ case NEON::BI__builtin_neon_vrndnq_v: {
+ Int = Intrinsic::arm64_neon_frintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
+ }
+ case NEON::BI__builtin_neon_vrndp_v:
+ case NEON::BI__builtin_neon_vrndpq_v: {
+ Int = Intrinsic::ceil;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
+ }
+ case NEON::BI__builtin_neon_vrndx_v:
+ case NEON::BI__builtin_neon_vrndxq_v: {
+ Int = Intrinsic::rint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
+ }
+ case NEON::BI__builtin_neon_vrnd_v:
+ case NEON::BI__builtin_neon_vrndq_v: {
+ Int = Intrinsic::trunc;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
+ }
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
+ ICmpInst::ICMP_EQ, "vceqz");
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
+ ICmpInst::ICMP_SGE, "vcgez");
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
+ ICmpInst::ICMP_SLE, "vclez");
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
+ ICmpInst::ICMP_SGT, "vcgtz");
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
+ ICmpInst::ICMP_SLT, "vcltz");
+ case NEON::BI__builtin_neon_vcvt_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_f64_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+ return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+ : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_f64_f32: {
+ assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
+ "unexpected vcvt_f64_f32 builtin");
+ NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+
+ return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
+ }
+ case NEON::BI__builtin_neon_vcvt_f32_f64: {
+ assert(Type.getEltType() == NeonTypeFlags::Float32 &&
+ "unexpected vcvt_f32_f64 builtin");
+ NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+
+ return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
+ }
+ case NEON::BI__builtin_neon_vcvt_s32_v:
+ case NEON::BI__builtin_neon_vcvt_u32_v:
+ case NEON::BI__builtin_neon_vcvt_s64_v:
+ case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, quad));
+ Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Ty);
+ return Builder.CreateFPToSI(Ops[0], Ty);
+ }
+ case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s32_v:
+ case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u32_v:
+ case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u64_v: {
+ Int = usgn ? Intrinsic::arm64_neon_fcvtau : Intrinsic::arm64_neon_fcvtas;
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, InTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
+ }
+ case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u32_v:
+ case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u64_v: {
+ Int = usgn ? Intrinsic::arm64_neon_fcvtmu : Intrinsic::arm64_neon_fcvtms;
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, InTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
+ }
+ case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u32_v:
+ case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u64_v: {
+ Int = usgn ? Intrinsic::arm64_neon_fcvtnu : Intrinsic::arm64_neon_fcvtns;
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, InTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
+ }
+ case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u32_v:
+ case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u64_v: {
+ Int = usgn ? Intrinsic::arm64_neon_fcvtpu : Intrinsic::arm64_neon_fcvtps;
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+ llvm::Type *InTy =
+ GetNeonType(this,
+ NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, InTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
+ }
+ case NEON::BI__builtin_neon_vmulx_v:
+ case NEON::BI__builtin_neon_vmulxq_v: {
+ Int = Intrinsic::arm64_neon_fmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+ }
+ case NEON::BI__builtin_neon_vmul_lane_v:
+ case NEON::BI__builtin_neon_vmul_laneq_v: {
+ // v1f64 vmul_lane should be mapped to Neon scalar mul lane
+ bool Quad = false;
+ if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
+ Quad = true;
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ llvm::Type *VTy = GetNeonType(this,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
+ Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+ Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
+ return Builder.CreateBitCast(Result, Ty);
+ }
+ case NEON::BI__builtin_neon_vnegd_s64:
+ return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
+ case NEON::BI__builtin_neon_vpmaxnm_v:
+ case NEON::BI__builtin_neon_vpmaxnmq_v: {
+ Int = Intrinsic::arm64_neon_fmaxnmp;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
+ }
+ case NEON::BI__builtin_neon_vpminnm_v:
+ case NEON::BI__builtin_neon_vpminnmq_v: {
+ Int = Intrinsic::arm64_neon_fminnmp;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
+ }
+ case NEON::BI__builtin_neon_vsqrt_v:
+ case NEON::BI__builtin_neon_vsqrtq_v: {
+ Int = Intrinsic::sqrt;
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
+ }
+ case NEON::BI__builtin_neon_vrbit_v:
+ case NEON::BI__builtin_neon_vrbitq_v: {
+ Int = Intrinsic::arm64_neon_rbit;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
+ }
+ case NEON::BI__builtin_neon_vaddv_u8:
+ // FIXME: These are handled by the AArch64 scalar code.
+ usgn = true;
+ // FALLTHROUGH
+ case NEON::BI__builtin_neon_vaddv_s8: {
+ Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vaddv_u16:
+ usgn = true;
+ // FALLTHROUGH
+ case NEON::BI__builtin_neon_vaddv_s16: {
+ Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vaddvq_u8:
+ usgn = true;
+ // FALLTHROUGH
+ case NEON::BI__builtin_neon_vaddvq_s8: {
+ Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vaddvq_u16:
+ usgn = true;
+ // FALLTHROUGH
+ case NEON::BI__builtin_neon_vaddvq_s16: {
+ Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vmaxv_u8: {
+ Int = Intrinsic::arm64_neon_umaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vmaxv_u16: {
+ Int = Intrinsic::arm64_neon_umaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vmaxvq_u8: {
+ Int = Intrinsic::arm64_neon_umaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vmaxvq_u16: {
+ Int = Intrinsic::arm64_neon_umaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vmaxv_s8: {
+ Int = Intrinsic::arm64_neon_smaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vmaxv_s16: {
+ Int = Intrinsic::arm64_neon_smaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vmaxvq_s8: {
+ Int = Intrinsic::arm64_neon_smaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vmaxvq_s16: {
+ Int = Intrinsic::arm64_neon_smaxv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vminv_u8: {
+ Int = Intrinsic::arm64_neon_uminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vminv_u16: {
+ Int = Intrinsic::arm64_neon_uminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vminvq_u8: {
+ Int = Intrinsic::arm64_neon_uminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vminvq_u16: {
+ Int = Intrinsic::arm64_neon_uminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vminv_s8: {
+ Int = Intrinsic::arm64_neon_sminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vminv_s16: {
+ Int = Intrinsic::arm64_neon_sminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vminvq_s8: {
+ Int = Intrinsic::arm64_neon_sminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 8));
+ }
+ case NEON::BI__builtin_neon_vminvq_s16: {
+ Int = Intrinsic::arm64_neon_sminv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vmul_n_f64: {
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
+ return Builder.CreateFMul(Ops[0], RHS);
+ }
+ case NEON::BI__builtin_neon_vaddlv_u8: {
+ Int = Intrinsic::arm64_neon_uaddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vaddlv_u16: {
+ Int = Intrinsic::arm64_neon_uaddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ }
+ case NEON::BI__builtin_neon_vaddlvq_u8: {
+ Int = Intrinsic::arm64_neon_uaddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vaddlvq_u16: {
+ Int = Intrinsic::arm64_neon_uaddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ }
+ case NEON::BI__builtin_neon_vaddlv_s8: {
+ Int = Intrinsic::arm64_neon_saddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vaddlv_s16: {
+ Int = Intrinsic::arm64_neon_saddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ }
+ case NEON::BI__builtin_neon_vaddlvq_s8: {
+ Int = Intrinsic::arm64_neon_saddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ return Builder.CreateTrunc(Ops[0],
+ llvm::IntegerType::get(getLLVMContext(), 16));
+ }
+ case NEON::BI__builtin_neon_vaddlvq_s16: {
+ Int = Intrinsic::arm64_neon_saddlv;
+ Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+ VTy =
+ llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+ }
+ case NEON::BI__builtin_neon_vsri_n_v:
+ case NEON::BI__builtin_neon_vsriq_n_v: {
+ Int = Intrinsic::arm64_neon_vsri;
+ llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
+ return EmitNeonCall(Intrin, Ops, "vsri_n");
+ }
+ case NEON::BI__builtin_neon_vsli_n_v:
+ case NEON::BI__builtin_neon_vsliq_n_v: {
+ Int = Intrinsic::arm64_neon_vsli;
+ llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
+ return EmitNeonCall(Intrin, Ops, "vsli_n");
+ }
+ case NEON::BI__builtin_neon_vsra_n_v:
+ case NEON::BI__builtin_neon_vsraq_n_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
+ return Builder.CreateAdd(Ops[0], Ops[1]);
+ case NEON::BI__builtin_neon_vrsra_n_v:
+ case NEON::BI__builtin_neon_vrsraq_n_v: {
+ Int = usgn ? Intrinsic::arm64_neon_urshl : Intrinsic::arm64_neon_srshl;
+ SmallVector<llvm::Value*,2> TmpOps;
+ TmpOps.push_back(Ops[1]);
+ TmpOps.push_back(Ops[2]);
+ Function* F = CGM.getIntrinsic(Int, Ty);
+ llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
+ Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
+ return Builder.CreateAdd(Ops[0], tmp);
+ }
+ // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
+ // of an Align parameter here.
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ unsigned Int;
+ switch (BuiltinID) {
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ Int = Intrinsic::arm64_neon_ld1x2;
+ break;
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ Int = Intrinsic::arm64_neon_ld1x3;
+ break;
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v:
+ Int = Intrinsic::arm64_neon_ld1x4;
+ break;
+ }
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ llvm::Type *Tys[2] = { VTy, PTy };
+ unsigned Int;
+ switch (BuiltinID) {
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ Int = Intrinsic::arm64_neon_st1x2;
+ break;
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ Int = Intrinsic::arm64_neon_st1x3;
+ break;
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v:
+ Int = Intrinsic::arm64_neon_st1x4;
+ break;
+ }
+ SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end());
+ IntOps.push_back(Ops[0]);
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, "");
+ }
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+ return Builder.CreateLoad(Ops[0]);
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+ Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ case NEON::BI__builtin_neon_vld1_lane_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v:
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateLoad(Ops[0]);
+ return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v: {
+ Value *V = UndefValue::get(Ty);
+ Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateLoad(Ops[0]);
+ llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+ Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
+ return EmitNeonSplat(Ops[0], CI);
+ }
+ case NEON::BI__builtin_neon_vst1_lane_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v:
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v: {
+ llvm::Type *PTy =
+ llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2r, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v: {
+ llvm::Type *PTy =
+ llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3r, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v: {
+ llvm::Type *PTy =
+ llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4r, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
+ Ops[0] = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v: {
+ llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2lane, Tys);
+ Ops.push_back(Ops[1]);
+ Ops.erase(Ops.begin()+1);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateZExt(Ops[3],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ Ops[1] = Builder.CreateCall(F,
+ ArrayRef<Value*>(Ops).slice(1), "vld2_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v: {
+ llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3lane, Tys);
+ Ops.push_back(Ops[1]);
+ Ops.erase(Ops.begin()+1);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+ Ops[4] = Builder.CreateZExt(Ops[4],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ Ops[1] = Builder.CreateCall(F,
+ ArrayRef<Value*>(Ops).slice(1), "vld3_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v: {
+ llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+ Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4lane, Tys);
+ Ops.push_back(Ops[1]);
+ Ops.erase(Ops.begin()+1);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+ Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
+ Ops[5] = Builder.CreateZExt(Ops[5],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ Ops[1] = Builder.CreateCall(F,
+ ArrayRef<Value*>(Ops).slice(1), "vld4_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st2, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ Ops[2] = Builder.CreateZExt(Ops[2],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st2lane, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st3, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ Ops[3] = Builder.CreateZExt(Ops[3],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st3lane, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st4, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v: {
+ Ops.push_back(Ops[0]);
+ Ops.erase(Ops.begin());
+ Ops[4] = Builder.CreateZExt(Ops[4],
+ llvm::IntegerType::get(getLLVMContext(), 64));
+ llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st4lane, Tys),
+ Ops, "");
+ }
+ case NEON::BI__builtin_neon_vtrn_v:
+ case NEON::BI__builtin_neon_vtrnq_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
@@ -4865,8 +6714,8 @@
for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<Constant*, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(Builder.getInt32(i+vi));
- Indices.push_back(Builder.getInt32(i+e+vi));
+ Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
+ Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
}
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
SV = llvm::ConstantVector::get(Indices);
@@ -4875,8 +6724,8 @@
}
return SV;
}
- case ARM::BI__builtin_neon_vuzp_v:
- case ARM::BI__builtin_neon_vuzpq_v: {
+ case NEON::BI__builtin_neon_vuzp_v:
+ case NEON::BI__builtin_neon_vuzpq_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
@@ -4894,8 +6743,8 @@
}
return SV;
}
- case ARM::BI__builtin_neon_vzip_v:
- case ARM::BI__builtin_neon_vzipq_v: {
+ case NEON::BI__builtin_neon_vzip_v:
+ case NEON::BI__builtin_neon_vzipq_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
@@ -4914,6 +6763,48 @@
}
return SV;
}
+ case NEON::BI__builtin_neon_vqtbl1q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl1, Ty),
+ Ops, "vtbl1");
+ }
+ case NEON::BI__builtin_neon_vqtbl2q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl2, Ty),
+ Ops, "vtbl2");
+ }
+ case NEON::BI__builtin_neon_vqtbl3q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl3, Ty),
+ Ops, "vtbl3");
+ }
+ case NEON::BI__builtin_neon_vqtbl4q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl4, Ty),
+ Ops, "vtbl4");
+ }
+ case NEON::BI__builtin_neon_vqtbx1q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx1, Ty),
+ Ops, "vtbx1");
+ }
+ case NEON::BI__builtin_neon_vqtbx2q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx2, Ty),
+ Ops, "vtbx2");
+ }
+ case NEON::BI__builtin_neon_vqtbx3q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx3, Ty),
+ Ops, "vtbx3");
+ }
+ case NEON::BI__builtin_neon_vqtbx4q_v: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx4, Ty),
+ Ops, "vtbx4");
+ }
+ case NEON::BI__builtin_neon_vsqadd_v:
+ case NEON::BI__builtin_neon_vsqaddq_v: {
+ Int = Intrinsic::arm64_neon_usqadd;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
+ }
+ case NEON::BI__builtin_neon_vuqadd_v:
+ case NEON::BI__builtin_neon_vuqaddq_v: {
+ Int = Intrinsic::arm64_neon_suqadd;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
+ }
}
}
@@ -4970,6 +6861,14 @@
switch (BuiltinID) {
default: return 0;
+ case X86::BI_mm_prefetch: {
+ Value *Address = EmitScalarExpr(E->getArg(0));
+ Value *RW = ConstantInt::get(Int32Ty, 0);
+ Value *Locality = EmitScalarExpr(E->getArg(1));
+ Value *Data = ConstantInt::get(Int32Ty, 1);
+ Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
+ return Builder.CreateCall4(F, Address, RW, Locality, Data);
+ }
case X86::BI__builtin_ia32_vec_init_v8qi:
case X86::BI__builtin_ia32_vec_init_v4hi:
case X86::BI__builtin_ia32_vec_init_v2si:
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 0ebf1aa..fb11751 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -17,9 +17,9 @@
#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Support/CallSite.h"
#include <vector>
using namespace clang;
@@ -39,7 +39,7 @@
public:
CGNVCUDARuntime(CodeGenModule &CGM);
- void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
+ void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) override;
};
}
diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp
index eaf31bb..54a28f5 100644
--- a/lib/CodeGen/CGCUDARuntime.cpp
+++ b/lib/CodeGen/CGCUDARuntime.cpp
@@ -31,7 +31,8 @@
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
+ CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
+ /*TrueCount=*/0);
eval.begin(CGF);
CGF.EmitBlock(ConfigOKBlock);
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index cfb2d62..ef29af7 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -35,7 +35,7 @@
return true;
// Producing an alias to a base class ctor/dtor can degrade debug quality
- // as the debugger cannot tell them appart.
+ // as the debugger cannot tell them apart.
if (getCodeGenOpts().OptimizationLevel == 0)
return true;
@@ -56,22 +56,20 @@
// If any field has a non-trivial destructor, we have to emit the
// destructor separately.
- for (CXXRecordDecl::field_iterator I = Class->field_begin(),
- E = Class->field_end(); I != E; ++I)
+ for (const auto *I : Class->fields())
if (I->getType().isDestructedType())
return true;
// Try to find a unique base class with a non-trivial destructor.
const CXXRecordDecl *UniqueBase = 0;
- for (CXXRecordDecl::base_class_const_iterator I = Class->bases_begin(),
- E = Class->bases_end(); I != E; ++I) {
+ for (const auto &I : Class->bases()) {
// We're in the base destructor, so skip virtual bases.
- if (I->isVirtual()) continue;
+ if (I.isVirtual()) continue;
// Skip base classes with trivial destructors.
const CXXRecordDecl *Base
- = cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+ = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
if (Base->hasTrivialDestructor()) continue;
// If we've already found a base class with a non-trivial
@@ -92,7 +90,13 @@
if (!ClassLayout.getBaseClassOffset(UniqueBase).isZero())
return true;
+ // Give up if the calling conventions don't match. We could update the call,
+ // but it is probably not worth it.
const CXXDestructorDecl *BaseD = UniqueBase->getDestructor();
+ if (BaseD->getType()->getAs<FunctionType>()->getCallConv() !=
+ D->getType()->getAs<FunctionType>()->getCallConv())
+ return true;
+
return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
GlobalDecl(BaseD, Dtor_Base),
false);
@@ -107,7 +111,7 @@
if (!getCodeGenOpts().CXXCtorDtorAliases)
return true;
- // The alias will use the linkage of the referrent. If we can't
+ // The alias will use the linkage of the referent. If we can't
// support aliases with that linkage, fail.
llvm::GlobalValue::LinkageTypes Linkage = getFunctionLinkage(AliasDecl);
@@ -130,7 +134,7 @@
llvm::PointerType *AliasType
= getTypes().GetFunctionType(AliasDecl)->getPointerTo();
- // Find the referrent. Some aliases might require a bitcast, in
+ // Find the referent. Some aliases might require a bitcast, in
// which case the caller is responsible for ensuring the soundness
// of these semantics.
llvm::GlobalValue *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl));
@@ -143,7 +147,7 @@
if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) &&
(TargetLinkage != llvm::GlobalValue::AvailableExternallyLinkage ||
!TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) {
- // FIXME: An extern template instanciation will create functions with
+ // FIXME: An extern template instantiation will create functions with
// linkage "AvailableExternally". In libc++, some classes also define
// members with attribute "AlwaysInline" and expect no reference to
// be generated. It is desirable to reenable this optimisation after
@@ -190,11 +194,13 @@
void CodeGenModule::EmitCXXConstructor(const CXXConstructorDecl *ctor,
CXXCtorType ctorType) {
- // The complete constructor is equivalent to the base constructor
- // for classes with no virtual bases. Try to emit it as an alias.
- if (getTarget().getCXXABI().hasConstructorVariants() &&
- !ctor->getParent()->getNumVBases() &&
- (ctorType == Ctor_Complete || ctorType == Ctor_Base)) {
+ if (!getTarget().getCXXABI().hasConstructorVariants()) {
+ // If there are no constructor variants, always emit the complete destructor.
+ ctorType = Ctor_Complete;
+ } else if (!ctor->getParent()->getNumVBases() &&
+ (ctorType == Ctor_Complete || ctorType == Ctor_Base)) {
+ // The complete constructor is equivalent to the base constructor
+ // for classes with no virtual bases. Try to emit it as an alias.
bool ProducedAlias =
!TryEmitDefinitionAsAlias(GlobalDecl(ctor, Ctor_Complete),
GlobalDecl(ctor, Ctor_Base), true);
@@ -205,8 +211,8 @@
const CGFunctionInfo &fnInfo =
getTypes().arrangeCXXConstructorDeclaration(ctor, ctorType);
- llvm::Function *fn =
- cast<llvm::Function>(GetAddrOfCXXConstructor(ctor, ctorType, &fnInfo));
+ llvm::Function *fn = cast<llvm::Function>(
+ GetAddrOfCXXConstructor(ctor, ctorType, &fnInfo, true));
setFunctionLinkage(GlobalDecl(ctor, ctorType), fn);
CodeGenFunction(*this).GenerateCode(GlobalDecl(ctor, ctorType), fn, fnInfo);
@@ -218,7 +224,8 @@
llvm::GlobalValue *
CodeGenModule::GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
CXXCtorType ctorType,
- const CGFunctionInfo *fnInfo) {
+ const CGFunctionInfo *fnInfo,
+ bool DontDefer) {
GlobalDecl GD(ctor, ctorType);
StringRef name = getMangledName(GD);
@@ -230,7 +237,8 @@
llvm::FunctionType *fnType = getTypes().GetFunctionType(*fnInfo);
return cast<llvm::Function>(GetOrCreateLLVMFunction(name, fnType, GD,
- /*ForVTable=*/false));
+ /*ForVTable=*/false,
+ DontDefer));
}
void CodeGenModule::EmitCXXDestructor(const CXXDestructorDecl *dtor,
@@ -260,8 +268,8 @@
const CGFunctionInfo &fnInfo =
getTypes().arrangeCXXDestructor(dtor, dtorType);
- llvm::Function *fn =
- cast<llvm::Function>(GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo));
+ llvm::Function *fn = cast<llvm::Function>(
+ GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo, 0, true));
setFunctionLinkage(GlobalDecl(dtor, dtorType), fn);
CodeGenFunction(*this).GenerateCode(GlobalDecl(dtor, dtorType), fn, fnInfo);
@@ -274,7 +282,8 @@
CodeGenModule::GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
CXXDtorType dtorType,
const CGFunctionInfo *fnInfo,
- llvm::FunctionType *fnType) {
+ llvm::FunctionType *fnType,
+ bool DontDefer) {
GlobalDecl GD(dtor, dtorType);
StringRef name = getMangledName(GD);
@@ -286,7 +295,8 @@
fnType = getTypes().GetFunctionType(*fnInfo);
}
return cast<llvm::Function>(GetOrCreateLLVMFunction(name, fnType, GD,
- /*ForVTable=*/false));
+ /*ForVTable=*/false,
+ DontDefer));
}
static llvm::Value *BuildAppleKextVirtualCall(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 412b278..2bb3907 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -37,10 +37,9 @@
return CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
}
-llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT) {
+llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+ llvm::Value *MemPtr, const MemberPointerType *MPT) {
ErrorUnsupportedABI(CGF, "calls through member pointers");
const FunctionProtoType *FPT =
@@ -52,10 +51,10 @@
return llvm::Constant::getNullValue(FTy->getPointerTo());
}
-llvm::Value *CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT) {
+llvm::Value *
+CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+ llvm::Value *Base, llvm::Value *MemPtr,
+ const MemberPointerType *MPT) {
ErrorUnsupportedABI(CGF, "loads of member pointers");
llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())->getPointerTo();
return llvm::Constant::getNullValue(Ty);
@@ -116,7 +115,7 @@
return true;
}
-void CGCXXABI::BuildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) {
+void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
// FIXME: I'm not entirely sure I like using a fake decl just for code
@@ -281,8 +280,9 @@
llvm::Function *InitFunc) {
}
-LValue CGCXXABI::EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
- const DeclRefExpr *DRE) {
+LValue CGCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD,
+ QualType LValType) {
ErrorUnsupportedABI(CGF, "odr-use of thread_local global");
return LValue();
}
@@ -290,3 +290,31 @@
bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
return false;
}
+
+/// What sort of uniqueness rules should we use for the RTTI for the
+/// given type?
+CGCXXABI::RTTIUniquenessKind
+CGCXXABI::classifyRTTIUniqueness(QualType CanTy,
+ llvm::GlobalValue::LinkageTypes Linkage) {
+ if (shouldRTTIBeUnique())
+ return RUK_Unique;
+
+ // It's only necessary for linkonce_odr or weak_odr linkage.
+ if (Linkage != llvm::GlobalValue::LinkOnceODRLinkage &&
+ Linkage != llvm::GlobalValue::WeakODRLinkage)
+ return RUK_Unique;
+
+ // It's only necessary with default visibility.
+ if (CanTy->getVisibility() != DefaultVisibility)
+ return RUK_Unique;
+
+ // If we're not required to publish this symbol, hide it.
+ if (Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
+ return RUK_NonUniqueHidden;
+
+ // If we're required to publish this symbol, as we might be under an
+ // explicit instantiation, leave it with default visibility but
+ // enable string-comparisons.
+ assert(Linkage == llvm::GlobalValue::WeakODRLinkage);
+ return RUK_NonUniqueVisible;
+}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 9e9a2a7..beaec2c 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -41,7 +41,7 @@
class CGCXXABI {
protected:
CodeGenModule &CGM;
- OwningPtr<MangleContext> MangleCtx;
+ std::unique_ptr<MangleContext> MangleCtx;
CGCXXABI(CodeGenModule &CGM)
: CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
@@ -60,15 +60,6 @@
/// Get a null value for unsupported member pointers.
llvm::Constant *GetBogusMemberPointer(QualType T);
- // FIXME: Every place that calls getVTT{Decl,Value} is something
- // that needs to be abstracted properly.
- ImplicitParamDecl *&getVTTDecl(CodeGenFunction &CGF) {
- return CGF.CXXStructorImplicitParamDecl;
- }
- llvm::Value *&getVTTValue(CodeGenFunction &CGF) {
- return CGF.CXXStructorImplicitParamValue;
- }
-
ImplicitParamDecl *&getStructorImplicitParamDecl(CodeGenFunction &CGF) {
return CGF.CXXStructorImplicitParamDecl;
}
@@ -76,11 +67,8 @@
return CGF.CXXStructorImplicitParamValue;
}
- /// Build a parameter variable suitable for 'this'.
- void BuildThisParam(CodeGenFunction &CGF, FunctionArgList &Params);
-
/// Perform prolog initialization of the parameter variable suitable
- /// for 'this' emitted by BuildThisParam.
+ /// for 'this' emitted by buildThisParam.
void EmitThisParam(CodeGenFunction &CGF);
ASTContext &getContext() const { return CGM.getContext(); }
@@ -134,17 +122,15 @@
/// Load a member function from an object and a member function
/// pointer. Apply the this-adjustment and set 'This' to the
/// adjusted value.
- virtual llvm::Value *
- EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ virtual llvm::Value *EmitLoadOfMemberFunctionPointer(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+ llvm::Value *MemPtr, const MemberPointerType *MPT);
/// Calculate an l-value from an object and a data member pointer.
- virtual llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ virtual llvm::Value *
+ EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+ llvm::Value *Base, llvm::Value *MemPtr,
+ const MemberPointerType *MPT);
/// Perform a derived-to-base, base-to-derived, or bitcast member
/// pointer conversion.
@@ -272,23 +258,27 @@
}
/// Perform ABI-specific "this" argument adjustment required prior to
- /// a virtual function call.
- virtual llvm::Value *adjustThisArgumentForVirtualCall(CodeGenFunction &CGF,
- GlobalDecl GD,
- llvm::Value *This) {
+ /// a call of a virtual function.
+ /// The "VirtualCall" argument is true iff the call itself is virtual.
+ virtual llvm::Value *
+ adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
+ llvm::Value *This,
+ bool VirtualCall) {
return This;
}
- /// Build the ABI-specific portion of the parameter list for a
- /// function. This generally involves a 'this' parameter and
- /// possibly some extra data for constructors and destructors.
+ /// Build a parameter variable suitable for 'this'.
+ void buildThisParam(CodeGenFunction &CGF, FunctionArgList &Params);
+
+ /// Insert any ABI-specific implicit parameters into the parameter list for a
+ /// function. This generally involves extra data for constructors and
+ /// destructors.
///
/// ABIs may also choose to override the return type, which has been
/// initialized with the type of 'this' if HasThisReturn(CGF.CurGD) is true or
/// the formal return type of the function otherwise.
- virtual void BuildInstanceFunctionParams(CodeGenFunction &CGF,
- QualType &ResTy,
- FunctionArgList &Params) = 0;
+ virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+ FunctionArgList &Params) = 0;
/// Perform ABI-specific "this" parameter adjustment in a virtual function
/// prologue.
@@ -300,14 +290,20 @@
/// Emit the ABI-specific prolog for the function.
virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0;
- /// Emit the constructor call. Return the function that is called.
- virtual void EmitConstructorCall(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type,
- bool ForVirtualBase, bool Delegating,
- llvm::Value *This,
- CallExpr::const_arg_iterator ArgBeg,
- CallExpr::const_arg_iterator ArgEnd) = 0;
+ /// Add any ABI-specific implicit arguments needed to call a constructor.
+ ///
+ /// \return The number of args added to the call, which is typically zero or
+ /// one.
+ virtual unsigned
+ addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating, CallArgList &Args) = 0;
+
+ /// Emit the destructor call.
+ virtual void EmitDestructorCall(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD, CXXDtorType Type,
+ bool ForVirtualBase, bool Delegating,
+ llvm::Value *This) = 0;
/// Emits the VTable definitions required for the given record type.
virtual void emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -485,8 +481,39 @@
/// Emit a reference to a non-local thread_local variable (including
/// triggering the initialization of all thread_local variables in its
/// translation unit).
- virtual LValue EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
- const DeclRefExpr *DRE);
+ virtual LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD,
+ QualType LValType);
+
+ /**************************** RTTI Uniqueness ******************************/
+
+protected:
+ /// Returns true if the ABI requires RTTI type_info objects to be unique
+ /// across a program.
+ virtual bool shouldRTTIBeUnique() { return true; }
+
+public:
+ /// What sort of unique-RTTI behavior should we use?
+ enum RTTIUniquenessKind {
+ /// We are guaranteeing, or need to guarantee, that the RTTI string
+ /// is unique.
+ RUK_Unique,
+
+ /// We are not guaranteeing uniqueness for the RTTI string, so we
+ /// can demote to hidden visibility but must use string comparisons.
+ RUK_NonUniqueHidden,
+
+ /// We are not guaranteeing uniqueness for the RTTI string, so we
+ /// have to use string comparisons, but we also have to emit it with
+ /// non-hidden visibility.
+ RUK_NonUniqueVisible
+ };
+
+ /// Return the required visibility status for the given type and linkage in
+ /// the current ABI.
+ RTTIUniquenessKind
+ classifyRTTIUniqueness(QualType CanTy,
+ llvm::GlobalValue::LinkageTypes Linkage);
};
// Create an instance of a C++ ABI class:
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 22f2467..e26d6b2d 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -26,10 +26,10 @@
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace clang;
using namespace CodeGen;
@@ -79,23 +79,26 @@
CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
// When translating an unprototyped function type, always use a
// variadic type.
- return arrangeLLVMFunctionInfo(FTNP->getResultType().getUnqualifiedType(),
- None, FTNP->getExtInfo(), RequiredArgs(0));
+ return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
+ false, None, FTNP->getExtInfo(),
+ RequiredArgs(0));
}
/// Arrange the LLVM function layout for a value of the given function
/// type, on top of any implicit parameters already stored. Use the
/// given ExtInfo instead of the ExtInfo from the function type.
static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT,
+ bool IsInstanceMethod,
SmallVectorImpl<CanQualType> &prefix,
CanQual<FunctionProtoType> FTP,
FunctionType::ExtInfo extInfo) {
RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
// FIXME: Kill copy.
- for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
- prefix.push_back(FTP->getArgType(i));
- CanQualType resultType = FTP->getResultType().getUnqualifiedType();
- return CGT.arrangeLLVMFunctionInfo(resultType, prefix, extInfo, required);
+ for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+ prefix.push_back(FTP->getParamType(i));
+ CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+ return CGT.arrangeLLVMFunctionInfo(resultType, IsInstanceMethod, prefix,
+ extInfo, required);
}
/// Arrange the argument and result information for a free function (i.e.
@@ -103,7 +106,7 @@
static const CGFunctionInfo &arrangeFreeFunctionType(CodeGenTypes &CGT,
SmallVectorImpl<CanQualType> &prefix,
CanQual<FunctionProtoType> FTP) {
- return arrangeLLVMFunctionInfo(CGT, prefix, FTP, FTP->getExtInfo());
+ return arrangeLLVMFunctionInfo(CGT, false, prefix, FTP, FTP->getExtInfo());
}
/// Arrange the argument and result information for a free function (i.e.
@@ -112,7 +115,7 @@
SmallVectorImpl<CanQualType> &prefix,
CanQual<FunctionProtoType> FTP) {
FunctionType::ExtInfo extInfo = FTP->getExtInfo();
- return arrangeLLVMFunctionInfo(CGT, prefix, FTP, extInfo);
+ return arrangeLLVMFunctionInfo(CGT, true, prefix, FTP, extInfo);
}
/// Arrange the argument and result information for a value of the
@@ -123,7 +126,7 @@
return ::arrangeFreeFunctionType(*this, argTypes, FTP);
}
-static CallingConv getCallingConventionForDecl(const Decl *D) {
+static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
// Set the appropriate calling convention for the Function.
if (D->hasAttr<StdCallAttr>())
return CC_X86StdCall;
@@ -146,6 +149,12 @@
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
+ if (D->hasAttr<MSABIAttr>())
+ return IsWindows ? CC_C : CC_X86_64Win64;
+
+ if (D->hasAttr<SysVABIAttr>())
+ return IsWindows ? CC_X86_64SysV : CC_C;
+
return CC_C;
}
@@ -202,18 +211,41 @@
CanQualType resultType =
TheCXXABI.HasThisReturn(GD) ? argTypes.front() : Context.VoidTy;
- TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
-
CanQual<FunctionProtoType> FTP = GetFormalType(D);
- RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size());
-
// Add the formal parameters.
- for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
- argTypes.push_back(FTP->getArgType(i));
+ for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+ argTypes.push_back(FTP->getParamType(i));
+
+ TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
+
+ RequiredArgs required =
+ (D->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
FunctionType::ExtInfo extInfo = FTP->getExtInfo();
- return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo, required);
+ return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo, required);
+}
+
+/// Arrange a call to a C++ method, passing the given arguments.
+const CGFunctionInfo &
+CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
+ const CXXConstructorDecl *D,
+ CXXCtorType CtorKind,
+ unsigned ExtraArgs) {
+ // FIXME: Kill copy.
+ SmallVector<CanQualType, 16> ArgTypes;
+ for (CallArgList::const_iterator i = args.begin(), e = args.end(); i != e;
+ ++i)
+ ArgTypes.push_back(Context.getCanonicalParamType(i->Ty));
+
+ CanQual<FunctionProtoType> FPT = GetFormalType(D);
+ RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
+ GlobalDecl GD(D, CtorKind);
+ CanQualType ResultType =
+ TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() : Context.VoidTy;
+
+ FunctionType::ExtInfo Info = FPT->getExtInfo();
+ return arrangeLLVMFunctionInfo(ResultType, true, ArgTypes, Info, Required);
}
/// Arrange the argument and result information for a declaration,
@@ -232,11 +264,11 @@
TheCXXABI.BuildDestructorSignature(D, dtorKind, resultType, argTypes);
CanQual<FunctionProtoType> FTP = GetFormalType(D);
- assert(FTP->getNumArgs() == 0 && "dtor with formal parameters");
+ assert(FTP->getNumParams() == 0 && "dtor with formal parameters");
assert(FTP->isVariadic() == 0 && "dtor with formal parameters");
FunctionType::ExtInfo extInfo = FTP->getExtInfo();
- return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo,
+ return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo,
RequiredArgs::All);
}
@@ -256,7 +288,7 @@
// non-variadic type.
if (isa<FunctionNoProtoType>(FTy)) {
CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
- return arrangeLLVMFunctionInfo(noProto->getResultType(), None,
+ return arrangeLLVMFunctionInfo(noProto->getReturnType(), false, None,
noProto->getExtInfo(), RequiredArgs::All);
}
@@ -286,13 +318,13 @@
argTys.push_back(Context.getCanonicalParamType(receiverType));
argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
// FIXME: Kill copy?
- for (ObjCMethodDecl::param_const_iterator i = MD->param_begin(),
- e = MD->param_end(); i != e; ++i) {
- argTys.push_back(Context.getCanonicalParamType((*i)->getType()));
+ for (const auto *I : MD->params()) {
+ argTys.push_back(Context.getCanonicalParamType(I->getType()));
}
FunctionType::ExtInfo einfo;
- einfo = einfo.withCallingConv(getCallingConventionForDecl(MD));
+ bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
+ einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
if (getContext().getLangOpts().ObjCAutoRefCount &&
MD->hasAttr<NSReturnsRetainedAttr>())
@@ -301,8 +333,8 @@
RequiredArgs required =
(MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
- return arrangeLLVMFunctionInfo(GetReturnType(MD->getResultType()), argTys,
- einfo, required);
+ return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), false,
+ argTys, einfo, required);
}
const CGFunctionInfo &
@@ -336,7 +368,7 @@
// extra prefix plus the arguments in the prototype.
if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
if (proto->isVariadic())
- required = RequiredArgs(proto->getNumArgs() + numExtraRequiredArgs);
+ required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
// If we don't have a prototype at all, but we're supposed to
// explicitly use the variadic convention for unprototyped calls,
@@ -348,7 +380,7 @@
required = RequiredArgs(args.size());
}
- return CGT.arrangeFreeFunctionCall(fnType->getResultType(), args,
+ return CGT.arrangeFreeFunctionCall(fnType->getReturnType(), args,
fnType->getExtInfo(), required);
}
@@ -380,8 +412,8 @@
for (CallArgList::const_iterator i = args.begin(), e = args.end();
i != e; ++i)
argTypes.push_back(Context.getCanonicalParamType(i->Ty));
- return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
- required);
+ return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes,
+ info, required);
}
/// Arrange a call to a C++ method, passing the given arguments.
@@ -396,15 +428,13 @@
argTypes.push_back(Context.getCanonicalParamType(i->Ty));
FunctionType::ExtInfo info = FPT->getExtInfo();
- return arrangeLLVMFunctionInfo(GetReturnType(FPT->getResultType()),
+ return arrangeLLVMFunctionInfo(GetReturnType(FPT->getReturnType()), true,
argTypes, info, required);
}
-const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
- const FunctionArgList &args,
- const FunctionType::ExtInfo &info,
- bool isVariadic) {
+const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
+ QualType resultType, const FunctionArgList &args,
+ const FunctionType::ExtInfo &info, bool isVariadic) {
// FIXME: Kill copy.
SmallVector<CanQualType, 16> argTypes;
for (FunctionArgList::const_iterator i = args.begin(), e = args.end();
@@ -413,12 +443,12 @@
RequiredArgs required =
(isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
- return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
+ return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes, info,
required);
}
const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
- return arrangeLLVMFunctionInfo(getContext().VoidTy, None,
+ return arrangeLLVMFunctionInfo(getContext().VoidTy, false, None,
FunctionType::ExtInfo(), RequiredArgs::All);
}
@@ -427,6 +457,7 @@
/// above functions ultimately defer to.
const CGFunctionInfo &
CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
+ bool IsInstanceMethod,
ArrayRef<CanQualType> argTypes,
FunctionType::ExtInfo info,
RequiredArgs required) {
@@ -440,7 +471,8 @@
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
- CGFunctionInfo::Profile(ID, info, required, resultType, argTypes);
+ CGFunctionInfo::Profile(ID, IsInstanceMethod, info, required, resultType,
+ argTypes);
void *insertPos = 0;
CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
@@ -448,7 +480,8 @@
return *FI;
// Construct the function info. We co-allocate the ArgInfos.
- FI = CGFunctionInfo::create(CC, info, resultType, argTypes, required);
+ FI = CGFunctionInfo::create(CC, IsInstanceMethod, info, resultType, argTypes,
+ required);
FunctionInfos.InsertNode(FI, insertPos);
bool inserted = FunctionsBeingProcessed.insert(FI); (void)inserted;
@@ -464,10 +497,9 @@
if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == 0)
retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
- for (CGFunctionInfo::arg_iterator I = FI->arg_begin(), E = FI->arg_end();
- I != E; ++I)
- if (I->info.canHaveCoerceToType() && I->info.getCoerceToType() == 0)
- I->info.setCoerceToType(ConvertType(I->type));
+ for (auto &I : FI->arguments())
+ if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == 0)
+ I.info.setCoerceToType(ConvertType(I.type));
bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
assert(erased && "Not in set?");
@@ -476,6 +508,7 @@
}
CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
+ bool IsInstanceMethod,
const FunctionType::ExtInfo &info,
CanQualType resultType,
ArrayRef<CanQualType> argTypes,
@@ -486,11 +519,13 @@
FI->CallingConvention = llvmCC;
FI->EffectiveCallingConvention = llvmCC;
FI->ASTCallingConvention = info.getCC();
+ FI->InstanceMethod = IsInstanceMethod;
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
FI->Required = required;
FI->HasRegParm = info.getHasRegParm();
FI->RegParm = info.getRegParm();
+ FI->ArgStruct = 0;
FI->NumArgs = argTypes.size();
FI->getArgsBuffer()[0].type = resultType;
for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
@@ -516,9 +551,7 @@
const FieldDecl *LargestFD = 0;
CharUnits UnionSize = CharUnits::Zero();
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -530,11 +563,10 @@
if (LargestFD)
GetExpandedTypes(LargestFD->getType(), expandedTypes);
} else {
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- assert(!i->isBitField() &&
+ for (const auto *I : RD->fields()) {
+ assert(!I->isBitField() &&
"Cannot expand structure with bit-field members.");
- GetExpandedTypes(i->getType(), expandedTypes);
+ GetExpandedTypes(I->getType(), expandedTypes);
}
}
} else if (const ComplexType *CT = type->getAs<ComplexType>()) {
@@ -567,9 +599,7 @@
const FieldDecl *LargestFD = 0;
CharUnits UnionSize = CharUnits::Zero();
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -584,9 +614,7 @@
AI = ExpandTypeFromArgs(LargestFD->getType(), SubLV, AI);
}
} else {
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
QualType FT = FD->getType();
// FIXME: What are the right qualifiers here?
@@ -850,6 +878,11 @@
return FI.getReturnInfo().isIndirect();
}
+bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
+ return ReturnTypeUsesSRet(FI) &&
+ getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
+}
+
bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
switch (BT->getKind()) {
@@ -902,6 +935,18 @@
resultType = retAI.getCoerceToType();
break;
+ case ABIArgInfo::InAlloca:
+ if (retAI.getInAllocaSRet()) {
+ // sret things on win32 aren't void, they return the sret pointer.
+ QualType ret = FI.getReturnType();
+ llvm::Type *ty = ConvertType(ret);
+ unsigned addressSpace = Context.getTargetAddressSpace(ret);
+ resultType = llvm::PointerType::get(ty, addressSpace);
+ } else {
+ resultType = llvm::Type::getVoidTy(getLLVMContext());
+ }
+ break;
+
case ABIArgInfo::Indirect: {
assert(!retAI.getIndirectAlign() && "Align unused on indirect return.");
resultType = llvm::Type::getVoidTy(getLLVMContext());
@@ -934,6 +979,7 @@
switch (argAI.getKind()) {
case ABIArgInfo::Ignore:
+ case ABIArgInfo::InAlloca:
break;
case ABIArgInfo::Indirect: {
@@ -964,6 +1010,10 @@
}
}
+ // Add the inalloca struct as the last parameter type.
+ if (llvm::StructType *ArgStruct = FI.getArgStruct())
+ argTypes.push_back(ArgStruct->getPointerTo());
+
bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
assert(Erased && "Not in set?");
@@ -1006,6 +1056,8 @@
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
if (TargetDecl->hasAttr<NoReturnAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+ if (TargetDecl->hasAttr<NoDuplicateAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
@@ -1089,6 +1141,13 @@
case ABIArgInfo::Ignore:
break;
+ case ABIArgInfo::InAlloca: {
+ // inalloca disables readnone and readonly
+ FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+ .removeAttribute(llvm::Attribute::ReadNone);
+ break;
+ }
+
case ABIArgInfo::Indirect: {
llvm::AttrBuilder SRETAttrs;
SRETAttrs.addAttribute(llvm::Attribute::StructRet);
@@ -1114,10 +1173,9 @@
llvm::AttributeSet::ReturnIndex,
RetAttrs));
- for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
- ie = FI.arg_end(); it != ie; ++it) {
- QualType ParamType = it->type;
- const ABIArgInfo &AI = it->info;
+ for (const auto &I : FI.arguments()) {
+ QualType ParamType = I.type;
+ const ABIArgInfo &AI = I.info;
llvm::AttrBuilder Attrs;
if (AI.getPaddingType()) {
@@ -1173,6 +1231,13 @@
// Skip increment, no matching LLVM parameter.
continue;
+ case ABIArgInfo::InAlloca:
+ // inalloca disables readnone and readonly.
+ FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+ .removeAttribute(llvm::Attribute::ReadNone);
+ // Skip increment, no matching LLVM parameter.
+ continue;
+
case ABIArgInfo::Expand: {
SmallVector<llvm::Type*, 8> types;
// FIXME: This is rather inefficient. Do we ever actually need to do
@@ -1188,6 +1253,14 @@
PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
++Index;
}
+
+ // Add the inalloca attribute to the trailing inalloca parameter if present.
+ if (FI.usesInAlloca()) {
+ llvm::AttrBuilder Attrs;
+ Attrs.addAttribute(llvm::Attribute::InAlloca);
+ PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
+ }
+
if (FuncAttrs.hasAttributes())
PAL.push_back(llvm::
AttributeSet::get(getLLVMContext(),
@@ -1224,7 +1297,7 @@
// return statements.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl)) {
if (FD->hasImplicitReturnZero()) {
- QualType RetTy = FD->getResultType().getUnqualifiedType();
+ QualType RetTy = FD->getReturnType().getUnqualifiedType();
llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
Builder.CreateStore(Zero, ReturnValue);
@@ -1237,6 +1310,16 @@
// Emit allocs for param decls. Give the LLVM Argument nodes names.
llvm::Function::arg_iterator AI = Fn->arg_begin();
+ // If we're using inalloca, all the memory arguments are GEPs off of the last
+ // parameter, which is a pointer to the complete memory area.
+ llvm::Value *ArgStruct = 0;
+ if (FI.usesInAlloca()) {
+ llvm::Function::arg_iterator EI = Fn->arg_end();
+ --EI;
+ ArgStruct = EI;
+ assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
+ }
+
// Name the struct return argument.
if (CGM.ReturnTypeUsesSRet(FI)) {
AI->setName("agg.result");
@@ -1246,6 +1329,18 @@
++AI;
}
+ // Track if we received the parameter as a pointer (indirect, byval, or
+ // inalloca). If already have a pointer, EmitParmDecl doesn't need to copy it
+ // into a local alloca for us.
+ enum ValOrPointer { HaveValue = 0, HavePointer = 1 };
+ typedef llvm::PointerIntPair<llvm::Value *, 1> ValueAndIsPtr;
+ SmallVector<ValueAndIsPtr, 16> ArgVals;
+ ArgVals.reserve(Args.size());
+
+ // Create a pointer value for every parameter declaration. This usually
+ // entails copying one or more LLVM IR arguments into an alloca. Don't push
+ // any cleanups or do anything that might unwind. We do that separately, so
+ // we can push the cleanups in the correct order for the ABI.
assert(FI.arg_size() == Args.size() &&
"Mismatch between function signature & arguments.");
unsigned ArgNo = 1;
@@ -1264,6 +1359,13 @@
++AI;
switch (ArgI.getKind()) {
+ case ABIArgInfo::InAlloca: {
+ llvm::Value *V = Builder.CreateStructGEP(
+ ArgStruct, ArgI.getInAllocaFieldIndex(), Arg->getName());
+ ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+ continue; // Don't increment AI!
+ }
+
case ABIArgInfo::Indirect: {
llvm::Value *V = AI;
@@ -1290,6 +1392,7 @@
false);
V = AlignedTemp;
}
+ ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
} else {
// Load scalar value from indirect argument.
CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
@@ -1298,8 +1401,8 @@
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
+ ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
}
- EmitParmDecl(*Arg, V, ArgNo);
break;
}
@@ -1340,7 +1443,7 @@
if (V->getType() != LTy)
V = Builder.CreateBitCast(V, LTy);
- EmitParmDecl(*Arg, V, ArgNo);
+ ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
break;
}
@@ -1412,8 +1515,10 @@
V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty, Arg->getLocStart());
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
+ ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+ } else {
+ ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
}
- EmitParmDecl(*Arg, V, ArgNo);
continue; // Skip ++AI increment, already done.
}
@@ -1426,7 +1531,7 @@
Alloca->setAlignment(Align.getQuantity());
LValue LV = MakeAddrLValue(Alloca, Ty, Align);
llvm::Function::arg_iterator End = ExpandTypeFromArgs(Ty, LV, AI);
- EmitParmDecl(*Arg, Alloca, ArgNo);
+ ArgVals.push_back(ValueAndIsPtr(Alloca, HavePointer));
// Name the arguments used in expansion and increment AI.
unsigned Index = 0;
@@ -1437,11 +1542,12 @@
case ABIArgInfo::Ignore:
// Initialize the local variable appropriately.
- if (!hasScalarEvaluationKind(Ty))
- EmitParmDecl(*Arg, CreateMemTemp(Ty), ArgNo);
- else
- EmitParmDecl(*Arg, llvm::UndefValue::get(ConvertType(Arg->getType())),
- ArgNo);
+ if (!hasScalarEvaluationKind(Ty)) {
+ ArgVals.push_back(ValueAndIsPtr(CreateMemTemp(Ty), HavePointer));
+ } else {
+ llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
+ ArgVals.push_back(ValueAndIsPtr(U, HaveValue));
+ }
// Skip increment, no matching LLVM parameter.
continue;
@@ -1449,7 +1555,20 @@
++AI;
}
+
+ if (FI.usesInAlloca())
+ ++AI;
assert(AI == Fn->arg_end() && "Argument mismatch!");
+
+ if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+ for (int I = Args.size() - 1; I >= 0; --I)
+ EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+ I + 1);
+ } else {
+ for (unsigned I = 0, E = Args.size(); I != E; ++I)
+ EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+ I + 1);
+ }
}
static void eraseUnusedBitCasts(llvm::Instruction *insn) {
@@ -1626,7 +1745,7 @@
}
llvm::StoreInst *store =
- dyn_cast<llvm::StoreInst>(CGF.ReturnValue->use_back());
+ dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
if (!store) return 0;
// These aren't actually possible for non-coerced returns, and we
@@ -1662,6 +1781,20 @@
const ABIArgInfo &RetAI = FI.getReturnInfo();
switch (RetAI.getKind()) {
+ case ABIArgInfo::InAlloca:
+ // Aggregrates get evaluated directly into the destination. Sometimes we
+ // need to return the sret value in a register, though.
+ assert(hasAggregateEvaluationKind(RetTy));
+ if (RetAI.getInAllocaSRet()) {
+ llvm::Function::arg_iterator EI = CurFn->arg_end();
+ --EI;
+ llvm::Value *ArgStruct = EI;
+ llvm::Value *SRet =
+ Builder.CreateStructGEP(ArgStruct, RetAI.getInAllocaFieldIndex());
+ RV = Builder.CreateLoad(SRet, "sret");
+ }
+ break;
+
case ABIArgInfo::Indirect: {
switch (getEvaluationKind(RetTy)) {
case TEK_Complex: {
@@ -1750,6 +1883,25 @@
Ret->setDebugLoc(RetDbgLoc);
}
+static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
+ const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+ return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
+}
+
+static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, QualType Ty) {
+ // FIXME: Generate IR in one pass, rather than going back and fixing up these
+ // placeholders.
+ llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
+ llvm::Value *Placeholder =
+ llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
+ Placeholder = CGF.Builder.CreateLoad(Placeholder);
+ return AggValueSlot::forAddr(Placeholder, CharUnits::Zero(),
+ Ty.getQualifiers(),
+ AggValueSlot::IsNotDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased);
+}
+
void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
const VarDecl *param,
SourceLocation loc) {
@@ -1773,6 +1925,20 @@
return args.add(RValue::get(Builder.CreateLoad(local)), type);
}
+ if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+ AggValueSlot Slot = createPlaceholderSlot(*this, type);
+ Slot.setExternallyDestructed();
+
+ // FIXME: Either emit a copy constructor call, or figure out how to do
+ // guaranteed tail calls with perfect forwarding in LLVM.
+ CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk");
+ EmitNullInitialization(Slot.getAddr(), type);
+
+ RValue RV = Slot.asRValue();
+ args.add(RV, type);
+ return;
+ }
+
args.add(convertTempToRValue(local, type, loc), type);
}
@@ -1852,14 +2018,13 @@
static void emitWritebacks(CodeGenFunction &CGF,
const CallArgList &args) {
- for (CallArgList::writeback_iterator
- i = args.writeback_begin(), e = args.writeback_end(); i != e; ++i)
- emitWriteback(CGF, *i);
+ for (const auto &I : args.writebacks())
+ emitWriteback(CGF, I);
}
static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
const CallArgList &CallArgs) {
- assert(CGF.getTarget().getCXXABI().isArgumentDestroyedByCallee());
+ assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
CallArgs.getCleanupsToDeactivate();
// Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -2004,6 +2169,99 @@
args.add(RValue::get(finalArgument), CRE->getType());
}
+void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
+ assert(!StackBase && !StackCleanup.isValid());
+
+ // Save the stack.
+ llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
+ StackBase = CGF.Builder.CreateCall(F, "inalloca.save");
+
+ // Control gets really tied up in landing pads, so we have to spill the
+ // stacksave to an alloca to avoid violating SSA form.
+ // TODO: This is dead if we never emit the cleanup. We should create the
+ // alloca and store lazily on the first cleanup emission.
+ StackBaseMem = CGF.CreateTempAlloca(CGF.Int8PtrTy, "inalloca.spmem");
+ CGF.Builder.CreateStore(StackBase, StackBaseMem);
+ CGF.pushStackRestore(EHCleanup, StackBaseMem);
+ StackCleanup = CGF.EHStack.getInnermostEHScope();
+ assert(StackCleanup.isValid());
+}
+
+void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
+ if (StackBase) {
+ CGF.DeactivateCleanupBlock(StackCleanup, StackBase);
+ llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
+ // We could load StackBase from StackBaseMem, but in the non-exceptional
+ // case we can skip it.
+ CGF.Builder.CreateCall(F, StackBase);
+ }
+}
+
+void CodeGenFunction::EmitCallArgs(CallArgList &Args,
+ ArrayRef<QualType> ArgTypes,
+ CallExpr::const_arg_iterator ArgBeg,
+ CallExpr::const_arg_iterator ArgEnd,
+ bool ForceColumnInfo) {
+ CGDebugInfo *DI = getDebugInfo();
+ SourceLocation CallLoc;
+ if (DI) CallLoc = DI->getLocation();
+
+ // We *have* to evaluate arguments from right to left in the MS C++ ABI,
+ // because arguments are destroyed left to right in the callee.
+ if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+ // Insert a stack save if we're going to need any inalloca args.
+ bool HasInAllocaArgs = false;
+ for (ArrayRef<QualType>::iterator I = ArgTypes.begin(), E = ArgTypes.end();
+ I != E && !HasInAllocaArgs; ++I)
+ HasInAllocaArgs = isInAllocaArgument(CGM.getCXXABI(), *I);
+ if (HasInAllocaArgs) {
+ assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+ Args.allocateArgumentMemory(*this);
+ }
+
+ // Evaluate each argument.
+ size_t CallArgsStart = Args.size();
+ for (int I = ArgTypes.size() - 1; I >= 0; --I) {
+ CallExpr::const_arg_iterator Arg = ArgBeg + I;
+ EmitCallArg(Args, *Arg, ArgTypes[I]);
+ // Restore the debug location.
+ if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+ }
+
+ // Un-reverse the arguments we just evaluated so they match up with the LLVM
+ // IR function.
+ std::reverse(Args.begin() + CallArgsStart, Args.end());
+ return;
+ }
+
+ for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
+ CallExpr::const_arg_iterator Arg = ArgBeg + I;
+ assert(Arg != ArgEnd);
+ EmitCallArg(Args, *Arg, ArgTypes[I]);
+ // Restore the debug location.
+ if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+ }
+}
+
+namespace {
+
+struct DestroyUnpassedArg : EHScopeStack::Cleanup {
+ DestroyUnpassedArg(llvm::Value *Addr, QualType Ty)
+ : Addr(Addr), Ty(Ty) {}
+
+ llvm::Value *Addr;
+ QualType Ty;
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+ assert(!Dtor->isTrivial());
+ CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+ /*Delegating=*/false, Addr);
+ }
+};
+
+}
+
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
QualType type) {
if (const ObjCIndirectCopyRestoreExpr *CRE
@@ -2026,23 +2284,25 @@
// In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
// However, we still have to push an EH-only cleanup in case we unwind before
// we make it to the call.
- if (HasAggregateEvalKind &&
- CGM.getTarget().getCXXABI().isArgumentDestroyedByCallee()) {
- const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
- if (RD && RD->hasNonTrivialDestructor()) {
- AggValueSlot Slot = CreateAggTemp(type, "agg.arg.tmp");
- Slot.setExternallyDestructed();
- EmitAggExpr(E, Slot);
- RValue RV = Slot.asRValue();
- args.add(RV, type);
+ if (HasAggregateEvalKind && args.isUsingInAlloca()) {
+ assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+ AggValueSlot Slot = createPlaceholderSlot(*this, type);
+ Slot.setExternallyDestructed();
+ EmitAggExpr(E, Slot);
+ RValue RV = Slot.asRValue();
+ args.add(RV, type);
- pushDestroy(EHCleanup, RV.getAggregateAddr(), type, destroyCXXObject,
- /*useEHCleanupForArray*/ true);
+ const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+ if (RD->hasNonTrivialDestructor()) {
+ // Create a no-op GEP between the placeholder and the cleanup so we can
+ // RAUW it successfully. It also serves as a marker of the first
+ // instruction where the cleanup is active.
+ pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddr(), type);
// This unreachable is a temporary marker which will be removed later.
llvm::Instruction *IsActive = Builder.CreateUnreachable();
args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
- return;
}
+ return;
}
if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
@@ -2128,6 +2388,7 @@
call->setCallingConv(getRuntimeCC());
Builder.CreateUnreachable();
}
+ PGO.setCurrentRegionUnreachable();
}
/// Emits a call or invoke instruction to the given nullary runtime
@@ -2209,9 +2470,7 @@
const FieldDecl *LargestFD = 0;
CharUnits UnionSize = CharUnits::Zero();
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -2225,10 +2484,7 @@
ExpandTypeToArgs(LargestFD->getType(), FldRV, Args, IRFuncTy);
}
} else {
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- FieldDecl *FD = *i;
-
+ for (const auto *FD : RD->fields()) {
RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
ExpandTypeToArgs(FD->getType(), FldRV, Args, IRFuncTy);
}
@@ -2251,6 +2507,20 @@
}
}
+/// \brief Store a non-aggregate value to an address to initialize it. For
+/// initialization, a non-atomic store will be used.
+static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
+ LValue Dst) {
+ if (Src.isScalar())
+ CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
+ else
+ CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
+}
+
+void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
+ llvm::Value *New) {
+ DeferredReplacements.push_back(std::make_pair(Old, New));
+}
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Callee,
@@ -2272,14 +2542,32 @@
cast<llvm::FunctionType>(
cast<llvm::PointerType>(Callee->getType())->getElementType());
+ // If we're using inalloca, insert the allocation after the stack save.
+ // FIXME: Do this earlier rather than hacking it in here!
+ llvm::Value *ArgMemory = 0;
+ if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
+ llvm::AllocaInst *AI = new llvm::AllocaInst(
+ ArgStruct, "argmem", CallArgs.getStackBase()->getNextNode());
+ AI->setUsedWithInAlloca(true);
+ assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
+ ArgMemory = AI;
+ }
+
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
- if (CGM.ReturnTypeUsesSRet(CallInfo)) {
- llvm::Value *Value = ReturnValue.getValue();
- if (!Value)
- Value = CreateMemTemp(RetTy);
- Args.push_back(Value);
- checkArgMatches(Value, IRArgNo, IRFuncTy);
+ llvm::Value *SRetPtr = 0;
+ if (CGM.ReturnTypeUsesSRet(CallInfo) || RetAI.isInAlloca()) {
+ SRetPtr = ReturnValue.getValue();
+ if (!SRetPtr)
+ SRetPtr = CreateMemTemp(RetTy);
+ if (CGM.ReturnTypeUsesSRet(CallInfo)) {
+ Args.push_back(SRetPtr);
+ checkArgMatches(SRetPtr, IRArgNo, IRFuncTy);
+ } else {
+ llvm::Value *Addr =
+ Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
+ Builder.CreateStore(SRetPtr, Addr);
+ }
}
assert(CallInfo.arg_size() == CallArgs.size() &&
@@ -2299,6 +2587,35 @@
}
switch (ArgInfo.getKind()) {
+ case ABIArgInfo::InAlloca: {
+ assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+ if (RV.isAggregate()) {
+ // Replace the placeholder with the appropriate argument slot GEP.
+ llvm::Instruction *Placeholder =
+ cast<llvm::Instruction>(RV.getAggregateAddr());
+ CGBuilderTy::InsertPoint IP = Builder.saveIP();
+ Builder.SetInsertPoint(Placeholder);
+ llvm::Value *Addr = Builder.CreateStructGEP(
+ ArgMemory, ArgInfo.getInAllocaFieldIndex());
+ Builder.restoreIP(IP);
+ deferPlaceholderReplacement(Placeholder, Addr);
+ } else {
+ // Store the RValue into the argument struct.
+ llvm::Value *Addr =
+ Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
+ unsigned AS = Addr->getType()->getPointerAddressSpace();
+ llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
+ // There are some cases where a trivial bitcast is not avoidable. The
+ // definition of a type later in a translation unit may change it's type
+ // from {}* to (%struct.foo*)*.
+ if (Addr->getType() != MemType)
+ Addr = Builder.CreateBitCast(Addr, MemType);
+ LValue argLV = MakeAddrLValue(Addr, I->Ty, TypeAlign);
+ EmitInitStoreOfNonAggregate(*this, RV, argLV);
+ }
+ break; // Don't increment IRArgNo!
+ }
+
case ABIArgInfo::Indirect: {
if (RV.isScalar() || RV.isComplex()) {
// Make a temporary alloca to pass the argument.
@@ -2307,13 +2624,8 @@
AI->setAlignment(ArgInfo.getIndirectAlign());
Args.push_back(AI);
- LValue argLV =
- MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
-
- if (RV.isScalar())
- EmitStoreOfScalar(RV.getScalarVal(), argLV, /*init*/ true);
- else
- EmitStoreOfComplex(RV.getComplexVal(), argLV, /*init*/ true);
+ LValue argLV = MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
+ EmitInitStoreOfNonAggregate(*this, RV, argLV);
// Validate argument match.
checkArgMatches(AI, IRArgNo, IRFuncTy);
@@ -2386,11 +2698,7 @@
if (RV.isScalar() || RV.isComplex()) {
SrcPtr = CreateMemTemp(I->Ty, "coerce");
LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
- if (RV.isScalar()) {
- EmitStoreOfScalar(RV.getScalarVal(), SrcLV, /*init*/ true);
- } else {
- EmitStoreOfComplex(RV.getComplexVal(), SrcLV, /*init*/ true);
- }
+ EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
} else
SrcPtr = RV.getAggregateAddr();
@@ -2456,6 +2764,34 @@
}
}
+ if (ArgMemory) {
+ llvm::Value *Arg = ArgMemory;
+ llvm::Type *LastParamTy =
+ IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
+ if (Arg->getType() != LastParamTy) {
+#ifndef NDEBUG
+ // Assert that these structs have equivalent element types.
+ llvm::StructType *FullTy = CallInfo.getArgStruct();
+ llvm::StructType *Prefix = cast<llvm::StructType>(
+ cast<llvm::PointerType>(LastParamTy)->getElementType());
+
+ // For variadic functions, the caller might supply a larger struct than
+ // the callee expects, and that's OK.
+ assert(Prefix->getNumElements() == FullTy->getNumElements() ||
+ (CallInfo.isVariadic() &&
+ Prefix->getNumElements() <= FullTy->getNumElements()));
+
+ for (llvm::StructType::element_iterator PI = Prefix->element_begin(),
+ PE = Prefix->element_end(),
+ FI = FullTy->element_begin();
+ PI != PE; ++PI, ++FI)
+ assert(*PI == *FI);
+#endif
+ Arg = Builder.CreateBitCast(Arg, LastParamTy);
+ }
+ Args.push_back(Arg);
+ }
+
if (!CallArgs.getCleanupsToDeactivate().empty())
deactivateArgCleanupsBeforeCall(*this, CallArgs);
@@ -2545,9 +2881,14 @@
if (CallArgs.hasWritebacks())
emitWritebacks(*this, CallArgs);
+ // The stack cleanup for inalloca arguments has to run out of the normal
+ // lexical order, so deactivate it and run it manually here.
+ CallArgs.freeArgumentMemory(*this);
+
switch (RetAI.getKind()) {
+ case ABIArgInfo::InAlloca:
case ABIArgInfo::Indirect:
- return convertTempToRValue(Args[0], RetTy, SourceLocation());
+ return convertTempToRValue(SRetPtr, RetTy, SourceLocation());
case ABIArgInfo::Ignore:
// If we are ignoring an argument that had a result, make sure to
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 532cb59..2e43d1d 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -56,6 +56,8 @@
class CallArgList :
public SmallVector<CallArg, 16> {
public:
+ CallArgList() : StackBase(0), StackBaseMem(0) {}
+
struct Writeback {
/// The original argument. Note that the argument l-value
/// is potentially null.
@@ -97,9 +99,12 @@
bool hasWritebacks() const { return !Writebacks.empty(); }
- typedef SmallVectorImpl<Writeback>::const_iterator writeback_iterator;
- writeback_iterator writeback_begin() const { return Writebacks.begin(); }
- writeback_iterator writeback_end() const { return Writebacks.end(); }
+ typedef llvm::iterator_range<SmallVectorImpl<Writeback>::const_iterator>
+ writeback_const_range;
+
+ writeback_const_range writebacks() const {
+ return writeback_const_range(Writebacks.begin(), Writebacks.end());
+ }
void addArgCleanupDeactivation(EHScopeStack::stable_iterator Cleanup,
llvm::Instruction *IsActiveIP) {
@@ -113,6 +118,14 @@
return CleanupsToDeactivate;
}
+ void allocateArgumentMemory(CodeGenFunction &CGF);
+ llvm::Instruction *getStackBase() const { return StackBase; }
+ void freeArgumentMemory(CodeGenFunction &CGF) const;
+
+ /// \brief Returns if we're using an inalloca struct to pass arguments in
+ /// memory.
+ bool isUsingInAlloca() const { return StackBase; }
+
private:
SmallVector<Writeback, 1> Writebacks;
@@ -120,6 +133,17 @@
/// is used to cleanup objects that are owned by the callee once the call
/// occurs.
SmallVector<CallArgCleanup, 1> CleanupsToDeactivate;
+
+ /// The stacksave call. It dominates all of the argument evaluation.
+ llvm::CallInst *StackBase;
+
+ /// The alloca holding the stackbase. We need it to maintain SSA form.
+ llvm::AllocaInst *StackBaseMem;
+
+ /// The iterator pointing to the stack restore cleanup. We manually run and
+ /// deactivate this cleanup after the call in the unexceptional case because
+ /// it doesn't run in the normal order.
+ EHScopeStack::stable_iterator StackCleanup;
};
/// FunctionArgList - Type for representing both the decl and type
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 4848d75..071f8b3 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "CGBlocks.h"
+#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
-#include "CGCXXABI.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/EvaluatedExprVisitor.h"
@@ -342,7 +342,7 @@
CallBaseDtor(const CXXRecordDecl *Base, bool BaseIsVirtual)
: BaseClass(Base), BaseIsVirtual(BaseIsVirtual) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const CXXRecordDecl *DerivedClass =
cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
@@ -549,10 +549,8 @@
// If we are initializing an anonymous union field, drill down to
// the field.
IndirectFieldDecl *IndirectField = MemberInit->getIndirectMember();
- IndirectFieldDecl::chain_iterator I = IndirectField->chain_begin(),
- IEnd = IndirectField->chain_end();
- for ( ; I != IEnd; ++I)
- LHS = CGF.EmitLValueForFieldInitialization(LHS, cast<FieldDecl>(*I));
+ for (const auto *I : IndirectField->chain())
+ LHS = CGF.EmitLValueForFieldInitialization(LHS, cast<FieldDecl>(I));
FieldType = MemberInit->getIndirectMember()->getAnonField()->getType();
} else {
LHS = CGF.EmitLValueForFieldInitialization(LHS, Field);
@@ -699,6 +697,10 @@
const CXXConstructorDecl *Ctor = cast<CXXConstructorDecl>(CurGD.getDecl());
CXXCtorType CtorType = CurGD.getCtorType();
+ assert((CGM.getTarget().getCXXABI().hasConstructorVariants() ||
+ CtorType == Ctor_Complete) &&
+ "can only generate complete ctor for this ABI");
+
// Before we go any further, try the complete->base constructor
// delegation optimization.
if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
@@ -717,6 +719,9 @@
if (IsTryBody)
EnterCXXTryStmt(*cast<CXXTryStmt>(Body), true);
+ RegionCounter Cnt = getPGORegionCounter(Body);
+ Cnt.beginRegion(Builder);
+
RunCleanupsScope RunCleanups(*this);
// TODO: in restricted cases, we can emit the vbase initializers of
@@ -1190,23 +1195,17 @@
return false;
// Check fields.
- for (CXXRecordDecl::field_iterator I = BaseClassDecl->field_begin(),
- E = BaseClassDecl->field_end(); I != E; ++I) {
- const FieldDecl *Field = *I;
-
+ for (const auto *Field : BaseClassDecl->fields())
if (!FieldHasTrivialDestructorBody(Context, Field))
return false;
- }
// Check non-virtual bases.
- for (CXXRecordDecl::base_class_const_iterator I =
- BaseClassDecl->bases_begin(), E = BaseClassDecl->bases_end();
- I != E; ++I) {
- if (I->isVirtual())
+ for (const auto &I : BaseClassDecl->bases()) {
+ if (I.isVirtual())
continue;
const CXXRecordDecl *NonVirtualBase =
- cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
if (!HasTrivialDestructorBody(Context, NonVirtualBase,
MostDerivedClassDecl))
return false;
@@ -1214,11 +1213,9 @@
if (BaseClassDecl == MostDerivedClassDecl) {
// Check virtual bases.
- for (CXXRecordDecl::base_class_const_iterator I =
- BaseClassDecl->vbases_begin(), E = BaseClassDecl->vbases_end();
- I != E; ++I) {
+ for (const auto &I : BaseClassDecl->vbases()) {
const CXXRecordDecl *VirtualBase =
- cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
if (!HasTrivialDestructorBody(Context, VirtualBase,
MostDerivedClassDecl))
return false;
@@ -1251,13 +1248,9 @@
// Check the fields.
const CXXRecordDecl *ClassDecl = Dtor->getParent();
- for (CXXRecordDecl::field_iterator I = ClassDecl->field_begin(),
- E = ClassDecl->field_end(); I != E; ++I) {
- const FieldDecl *Field = *I;
-
+ for (const auto *Field : ClassDecl->fields())
if (!FieldHasTrivialDestructorBody(Context, Field))
return false;
- }
return true;
}
@@ -1315,6 +1308,9 @@
case Dtor_Base:
assert(Body);
+ RegionCounter Cnt = getPGORegionCounter(Body);
+ Cnt.beginRegion(Builder);
+
// Enter the cleanup scopes for fields and non-virtual bases.
EnterDtorCleanups(Dtor, Dtor_Base);
@@ -1355,11 +1351,8 @@
LexicalScope Scope(*this, RootCS->getSourceRange());
AssignmentMemcpyizer AM(*this, AssignOp, Args);
- for (CompoundStmt::const_body_iterator I = RootCS->body_begin(),
- E = RootCS->body_end();
- I != E; ++I) {
- AM.emitAssignment(*I);
- }
+ for (auto *I : RootCS->body())
+ AM.emitAssignment(I);
AM.finish();
}
@@ -1368,7 +1361,7 @@
struct CallDtorDelete : EHScopeStack::Cleanup {
CallDtorDelete() {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl);
const CXXRecordDecl *ClassDecl = Dtor->getParent();
CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(),
@@ -1384,7 +1377,7 @@
assert(ShouldDeleteCondition != NULL);
}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete");
llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue");
llvm::Value *ShouldCallDelete
@@ -1413,7 +1406,7 @@
: field(field), destroyer(destroyer),
useEHCleanupForArray(useEHCleanupForArray) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Find the address of the field.
llvm::Value *thisValue = CGF.LoadCXXThis();
QualType RecordTy = CGF.getContext().getTagDeclType(field->getParent());
@@ -1427,7 +1420,7 @@
};
}
-/// EmitDtorEpilogue - Emit all code that comes at the end of class's
+/// \brief Emit all code that comes at the end of class's
/// destructor. This is to call destructors on members and base classes
/// in reverse order of their construction.
void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
@@ -1439,7 +1432,7 @@
// operator delete that Sema picked up.
if (DtorType == Dtor_Deleting) {
assert(DD->getOperatorDelete() &&
- "operator delete missing - EmitDtorEpilogue");
+ "operator delete missing - EnterDtorCleanups");
if (CXXStructorImplicitParamValue) {
// If there is an implicit param to the deleting dtor, it's a boolean
// telling whether we should call delete at the end of the dtor.
@@ -1462,10 +1455,7 @@
// We push them in the forward order so that they'll be popped in
// the reverse order.
- for (CXXRecordDecl::base_class_const_iterator I =
- ClassDecl->vbases_begin(), E = ClassDecl->vbases_end();
- I != E; ++I) {
- const CXXBaseSpecifier &Base = *I;
+ for (const auto &Base : ClassDecl->vbases()) {
CXXRecordDecl *BaseClassDecl
= cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
@@ -1484,10 +1474,7 @@
assert(DtorType == Dtor_Base);
// Destroy non-virtual bases.
- for (CXXRecordDecl::base_class_const_iterator I =
- ClassDecl->bases_begin(), E = ClassDecl->bases_end(); I != E; ++I) {
- const CXXBaseSpecifier &Base = *I;
-
+ for (const auto &Base : ClassDecl->bases()) {
// Ignore virtual bases.
if (Base.isVirtual())
continue;
@@ -1504,11 +1491,8 @@
}
// Destroy direct fields.
- SmallVector<const FieldDecl *, 16> FieldDecls;
- for (CXXRecordDecl::field_iterator I = ClassDecl->field_begin(),
- E = ClassDecl->field_end(); I != E; ++I) {
- const FieldDecl *field = *I;
- QualType type = field->getType();
+ for (const auto *Field : ClassDecl->fields()) {
+ QualType type = Field->getType();
QualType::DestructionKind dtorKind = type.isDestructedType();
if (!dtorKind) continue;
@@ -1517,7 +1501,7 @@
if (RT && RT->getDecl()->isAnonymousStructOrUnion()) continue;
CleanupKind cleanupKind = getCleanupKind(dtorKind);
- EHStack.pushCleanup<DestroyField>(cleanupKind, field,
+ EHStack.pushCleanup<DestroyField>(cleanupKind, Field,
getDestroyer(dtorKind),
cleanupKind & EHCleanup);
}
@@ -1683,9 +1667,31 @@
return;
}
- // Non-trivial constructors are handled in an ABI-specific manner.
- CGM.getCXXABI().EmitConstructorCall(*this, D, Type, ForVirtualBase,
- Delegating, This, ArgBeg, ArgEnd);
+ // C++11 [class.mfct.non-static]p2:
+ // If a non-static member function of a class X is called for an object that
+ // is not of type X, or of a type derived from X, the behavior is undefined.
+ // FIXME: Provide a source location here.
+ EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), This,
+ getContext().getRecordType(D->getParent()));
+
+ CallArgList Args;
+
+ // Push the this ptr.
+ Args.add(RValue::get(This), D->getThisType(getContext()));
+
+ // Add the rest of the user-supplied arguments.
+ const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+ EmitCallArgs(Args, FPT, ArgBeg, ArgEnd);
+
+ // Insert any ABI-specific implicit constructor arguments.
+ unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
+ *this, D, Type, ForVirtualBase, Delegating, Args);
+
+ // Emit the call.
+ llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
+ const CGFunctionInfo &Info =
+ CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs);
+ EmitCall(Info, Callee, ReturnValueSlot(), Args, D);
}
void
@@ -1704,38 +1710,23 @@
assert(D->isInstance() &&
"Trying to emit a member call expr on a static method!");
- const FunctionProtoType *FPT = D->getType()->getAs<FunctionProtoType>();
+ const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
CallArgList Args;
// Push the this ptr.
Args.add(RValue::get(This), D->getThisType(getContext()));
-
// Push the src ptr.
- QualType QT = *(FPT->arg_type_begin());
+ QualType QT = *(FPT->param_type_begin());
llvm::Type *t = CGM.getTypes().ConvertType(QT);
Src = Builder.CreateBitCast(Src, t);
Args.add(RValue::get(Src), QT);
-
+
// Skip over first argument (Src).
- ++ArgBeg;
- CallExpr::const_arg_iterator Arg = ArgBeg;
- for (FunctionProtoType::arg_type_iterator I = FPT->arg_type_begin()+1,
- E = FPT->arg_type_end(); I != E; ++I, ++Arg) {
- assert(Arg != ArgEnd && "Running over edge of argument list!");
- EmitCallArg(Args, *Arg, *I);
- }
- // Either we've emitted all the call args, or we have a call to a
- // variadic function.
- assert((Arg == ArgEnd || FPT->isVariadic()) &&
- "Extra arguments in non-variadic function!");
- // If we still have any arguments, emit them using the type of the argument.
- for (; Arg != ArgEnd; ++Arg) {
- QualType ArgType = Arg->getType();
- EmitCallArg(Args, *Arg, ArgType);
- }
-
+ EmitCallArgs(Args, FPT->isVariadic(), FPT->param_type_begin() + 1,
+ FPT->param_type_end(), ArgBeg + 1, ArgEnd);
+
EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All),
Callee, ReturnValueSlot(), Args, D);
}
@@ -1790,7 +1781,7 @@
CXXDtorType Type)
: Dtor(D), Addr(Addr), Type(Type) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
/*Delegating=*/true, Addr);
}
@@ -1830,23 +1821,8 @@
bool ForVirtualBase,
bool Delegating,
llvm::Value *This) {
- GlobalDecl GD(DD, Type);
- llvm::Value *VTT = GetVTTParameter(GD, ForVirtualBase, Delegating);
- llvm::Value *Callee = 0;
- if (getLangOpts().AppleKext)
- Callee = BuildAppleKextVirtualDestructorCall(DD, Type,
- DD->getParent());
-
- if (!Callee)
- Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
-
- if (DD->isVirtual())
- This = CGM.getCXXABI().adjustThisArgumentForVirtualCall(*this, GD, This);
-
- // FIXME: Provide a source location here.
- EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
- VTT, getContext().getPointerType(getContext().VoidPtrTy),
- 0, 0);
+ CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase,
+ Delegating, This);
}
namespace {
@@ -1857,7 +1833,7 @@
CallLocalDtor(const CXXDestructorDecl *D, llvm::Value *Addr)
: Dtor(D), Addr(Addr) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
/*ForVirtualBase=*/false,
/*Delegating=*/false, Addr);
@@ -1944,10 +1920,9 @@
const CXXRecordDecl *RD = Base.getBase();
// Traverse bases.
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
+ for (const auto &I : RD->bases()) {
CXXRecordDecl *BaseDecl
- = cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+ = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
// Ignore classes without a vtable.
if (!BaseDecl->isDynamicClass())
@@ -1957,7 +1932,7 @@
CharUnits BaseOffsetFromNearestVBase;
bool BaseDeclIsNonVirtualPrimaryBase;
- if (I->isVirtual()) {
+ if (I.isVirtual()) {
// Check if we've visited this virtual base before.
if (!VBases.insert(BaseDecl))
continue;
@@ -1978,7 +1953,7 @@
}
InitializeVTablePointers(BaseSubobject(BaseDecl, BaseOffset),
- I->isVirtual() ? BaseDecl : NearestVBase,
+ I.isVirtual() ? BaseDecl : NearestVBase,
BaseOffsetFromNearestVBase,
BaseDeclIsNonVirtualPrimaryBase,
VTableClass, VBases);
@@ -2127,7 +2102,7 @@
// Prepare the return slot.
const FunctionProtoType *FPT =
callOperator->getType()->castAs<FunctionProtoType>();
- QualType resultType = FPT->getResultType();
+ QualType resultType = FPT->getReturnType();
ReturnValueSlot returnSlot;
if (!resultType->isVoidType() &&
calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
@@ -2162,11 +2137,9 @@
CallArgs.add(RValue::get(ThisPtr), ThisType);
// Add the rest of the parameters.
- for (BlockDecl::param_const_iterator I = BD->param_begin(),
- E = BD->param_end(); I != E; ++I) {
- ParmVarDecl *param = *I;
+ for (auto param : BD->params())
EmitDelegateCallArg(CallArgs, param, param->getLocStart());
- }
+
assert(!Lambda->isGenericLambda() &&
"generic lambda interconversion to block not implemented");
EmitForwardingCallToLambda(Lambda->getLambdaCallOperator(), CallArgs);
@@ -2194,11 +2167,9 @@
CallArgs.add(RValue::get(ThisPtr), ThisType);
// Add the rest of the parameters.
- for (FunctionDecl::param_const_iterator I = MD->param_begin(),
- E = MD->param_end(); I != E; ++I) {
- ParmVarDecl *param = *I;
- EmitDelegateCallArg(CallArgs, param, param->getLocStart());
- }
+ for (auto Param : MD->params())
+ EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
+
const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
// For a generic lambda, find the corresponding call operator specialization
// to which the call to the static-invoker shall be forwarded.
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 65de4d4..8748224 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -528,7 +528,7 @@
llvm::BasicBlock *unreachableBB = CGF.getUnreachableBlock();
for (llvm::BasicBlock::use_iterator
i = entry->use_begin(), e = entry->use_end(); i != e; ) {
- llvm::Use &use = i.getUse();
+ llvm::Use &use = *i;
++i;
use.set(unreachableBB);
@@ -860,7 +860,9 @@
// Emit the EH cleanup if required.
if (RequiresEHCleanup) {
- if (CGDebugInfo *DI = getDebugInfo())
+ CGDebugInfo *DI = getDebugInfo();
+ SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
+ if (DI)
DI->EmitLocation(Builder, CurEHLocation);
CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 1bd6bba..fed75bc 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -194,6 +194,15 @@
return getHandlers()[I];
}
+ // Clear all handler blocks.
+ // FIXME: it's better to always call clearHandlerBlocks in DTOR and have a
+ // 'takeHandler' or some such function which removes ownership from the
+ // EHCatchScope object if the handlers should live longer than EHCatchScope.
+ void clearHandlerBlocks() {
+ for (unsigned I = 0, N = getNumHandlers(); I != N; ++I)
+ delete getHandler(I).Block;
+ }
+
typedef const Handler *iterator;
iterator begin() const { return getHandlers(); }
iterator end() const { return getHandlers() + getNumHandlers(); }
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index fcb26f0..0e94b51 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -37,7 +37,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/Path.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -52,30 +52,35 @@
"Region stack mismatch, stack not empty!");
}
-
-NoLocation::NoLocation(CodeGenFunction &CGF, CGBuilderTy &B)
- : DI(CGF.getDebugInfo()), Builder(B) {
+SaveAndRestoreLocation::SaveAndRestoreLocation(CodeGenFunction &CGF,
+ CGBuilderTy &B)
+ : DI(CGF.getDebugInfo()), Builder(B) {
if (DI) {
SavedLoc = DI->getLocation();
DI->CurLoc = SourceLocation();
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
}
}
+SaveAndRestoreLocation::~SaveAndRestoreLocation() {
+ if (DI)
+ DI->EmitLocation(Builder, SavedLoc);
+}
+
+NoLocation::NoLocation(CodeGenFunction &CGF, CGBuilderTy &B)
+ : SaveAndRestoreLocation(CGF, B) {
+ if (DI)
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
+}
+
NoLocation::~NoLocation() {
- if (DI) {
+ if (DI)
assert(Builder.getCurrentDebugLocation().isUnknown());
- DI->CurLoc = SavedLoc;
- }
}
ArtificialLocation::ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B)
- : DI(CGF.getDebugInfo()), Builder(B) {
- if (DI) {
- SavedLoc = DI->getLocation();
- DI->CurLoc = SourceLocation();
+ : SaveAndRestoreLocation(CGF, B) {
+ if (DI)
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
- }
}
void ArtificialLocation::Emit() {
@@ -91,10 +96,8 @@
}
ArtificialLocation::~ArtificialLocation() {
- if (DI) {
+ if (DI)
assert(Builder.getCurrentDebugLocation().getLine() == 0);
- DI->CurLoc = SavedLoc;
- }
}
void CGDebugInfo::setLocation(SourceLocation Loc) {
@@ -225,34 +228,20 @@
/// getClassName - Get class name including template argument list.
StringRef
CGDebugInfo::getClassName(const RecordDecl *RD) {
- const ClassTemplateSpecializationDecl *Spec
- = dyn_cast<ClassTemplateSpecializationDecl>(RD);
- if (!Spec)
+ // quick optimization to avoid having to intern strings that are already
+ // stored reliably elsewhere
+ if (!isa<ClassTemplateSpecializationDecl>(RD))
return RD->getName();
- const TemplateArgument *Args;
- unsigned NumArgs;
- if (TypeSourceInfo *TAW = Spec->getTypeAsWritten()) {
- const TemplateSpecializationType *TST =
- cast<TemplateSpecializationType>(TAW->getType());
- Args = TST->getArgs();
- NumArgs = TST->getNumArgs();
- } else {
- const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
- Args = TemplateArgs.data();
- NumArgs = TemplateArgs.size();
- }
- StringRef Name = RD->getIdentifier()->getName();
- PrintingPolicy Policy(CGM.getLangOpts());
- SmallString<128> TemplateArgList;
+ SmallString<128> Name;
{
- llvm::raw_svector_ostream OS(TemplateArgList);
- TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
- Policy);
+ llvm::raw_svector_ostream OS(Name);
+ RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(),
+ /*Qualified*/ false);
}
// Copy this name on the side and use its reference.
- return internString(Name, TemplateArgList);
+ return internString(Name);
}
/// getOrCreateFile - Get the file debug info descriptor for the input location.
@@ -342,9 +331,9 @@
if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
MainFileDir = MainFile->getDir()->getName();
if (MainFileDir != ".") {
- llvm::SmallString<1024> MainFileDirSS(MainFileDir);
- llvm::sys::path::append(MainFileDirSS, MainFileName);
- MainFileName = MainFileDirSS.str();
+ llvm::SmallString<1024> MainFileDirSS(MainFileDir);
+ llvm::sys::path::append(MainFileDirSS, MainFileName);
+ MainFileName = MainFileDirSS.str();
}
}
@@ -379,10 +368,12 @@
// Create new compile unit.
// FIXME - Eliminate TheCU.
- TheCU = DBuilder.createCompileUnit(LangTag, Filename, getCurrentDirname(),
- Producer, LO.Optimize,
- CGM.getCodeGenOpts().DwarfDebugFlags,
- RuntimeVers, SplitDwarfFilename);
+ TheCU = DBuilder.createCompileUnit(
+ LangTag, Filename, getCurrentDirname(), Producer, LO.Optimize,
+ CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, SplitDwarfFilename,
+ DebugKind == CodeGenOptions::DebugLineTablesOnly
+ ? llvm::DIBuilder::LineTablesOnly
+ : llvm::DIBuilder::FullDebug);
}
/// CreateType - Get the Basic type from the cache or create a new
@@ -736,14 +727,16 @@
return llvm::DIType();
// We don't set size information, but do specify where the typedef was
// declared.
- unsigned Line = getLineNumber(Ty->getDecl()->getLocation());
+ SourceLocation Loc = Ty->getDecl()->getLocation();
+ llvm::DIFile File = getOrCreateFile(Loc);
+ unsigned Line = getLineNumber(Loc);
const TypedefNameDecl *TyDecl = Ty->getDecl();
llvm::DIDescriptor TypedefContext =
getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext()));
return
- DBuilder.createTypedef(Src, TyDecl->getName(), Unit, Line, TypedefContext);
+ DBuilder.createTypedef(Src, TyDecl->getName(), File, Line, TypedefContext);
}
llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
@@ -751,15 +744,17 @@
SmallVector<llvm::Value *, 16> EltTys;
// Add the result type at least.
- EltTys.push_back(getOrCreateType(Ty->getResultType(), Unit));
+ EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit));
// Set up remainder of arguments if there is a prototype.
- // FIXME: IF NOT, HOW IS THIS REPRESENTED? llvm-gcc doesn't represent '...'!
+ // otherwise emit it as a variadic function.
if (isa<FunctionNoProtoType>(Ty))
EltTys.push_back(DBuilder.createUnspecifiedParameter());
else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) {
- for (unsigned i = 0, e = FPT->getNumArgs(); i != e; ++i)
- EltTys.push_back(getOrCreateType(FPT->getArgType(i), Unit));
+ for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
+ EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit));
+ if (FPT->isVariadic())
+ EltTys.push_back(DBuilder.createUnspecifiedParameter());
}
llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(EltTys);
@@ -784,7 +779,7 @@
uint64_t sizeInBits = 0;
unsigned alignInBits = 0;
if (!type->isIncompleteArrayType()) {
- llvm::tie(sizeInBits, alignInBits) = CGM.getContext().getTypeInfo(type);
+ std::tie(sizeInBits, alignInBits) = CGM.getContext().getTypeInfo(type);
if (sizeInBitsOverride)
sizeInBits = sizeInBitsOverride;
@@ -926,9 +921,8 @@
// Static and non-static members should appear in the same order as
// the corresponding declarations in the source program.
- for (RecordDecl::decl_iterator I = record->decls_begin(),
- E = record->decls_end(); I != E; ++I)
- if (const VarDecl *V = dyn_cast<VarDecl>(*I)) {
+ for (const auto *I : record->decls())
+ if (const auto *V = dyn_cast<VarDecl>(I)) {
// Reuse the existing static member declaration if one exists
llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator MI =
StaticDataMemberCache.find(V->getCanonicalDecl());
@@ -939,7 +933,7 @@
llvm::DIDerivedType(cast<llvm::MDNode>(MI->second)));
} else
elements.push_back(CreateRecordStaticField(V, RecordTy));
- } else if (FieldDecl *field = dyn_cast<FieldDecl>(*I)) {
+ } else if (const auto *field = dyn_cast<FieldDecl>(I)) {
CollectRecordNormalField(field, layout.getFieldOffset(fieldNo),
tunit, elements, RecordTy);
@@ -1005,7 +999,13 @@
llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(Elts);
- return DBuilder.createSubroutineType(Unit, EltTypeArray);
+ unsigned Flags = 0;
+ if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
+ Flags |= llvm::DIDescriptor::FlagLValueReference;
+ if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
+ Flags |= llvm::DIDescriptor::FlagRValueReference;
+
+ return DBuilder.createSubroutineType(Unit, EltTypeArray, Flags);
}
/// isFunctionLocalClass - Return true if CXXRecordDecl is defined
@@ -1084,6 +1084,10 @@
}
if (Method->hasPrototype())
Flags |= llvm::DIDescriptor::FlagPrototyped;
+ if (Method->getRefQualifier() == RQ_LValue)
+ Flags |= llvm::DIDescriptor::FlagLValueReference;
+ if (Method->getRefQualifier() == RQ_RValue)
+ Flags |= llvm::DIDescriptor::FlagRValueReference;
llvm::DIArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
llvm::DISubprogram SP =
@@ -1111,9 +1115,8 @@
// Since we want more than just the individual member decls if we
// have templated functions iterate over every declaration to gather
// the functions.
- for(DeclContext::decl_iterator I = RD->decls_begin(),
- E = RD->decls_end(); I != E; ++I) {
- if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(*I)) {
+ for(const auto *I : RD->decls()) {
+ if (const auto *Method = dyn_cast<CXXMethodDecl>(I)) {
// Reuse the existing member function declaration if it exists.
// It may be associated with the declaration of the type & should be
// reused as we're building the definition.
@@ -1130,16 +1133,13 @@
EltTys.push_back(CreateCXXMemberFunction(Method, Unit, RecordTy));
} else
EltTys.push_back(MI->second);
- } else if (const FunctionTemplateDecl *FTD =
- dyn_cast<FunctionTemplateDecl>(*I)) {
+ } else if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(I)) {
// Add any template specializations that have already been seen. Like
// implicit member functions, these may have been added to a declaration
// in the case of vtable-based debug info reduction.
- for (FunctionTemplateDecl::spec_iterator SI = FTD->spec_begin(),
- SE = FTD->spec_end();
- SI != SE; ++SI) {
+ for (const auto *SI : FTD->specializations()) {
llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI =
- SPCache.find(cast<CXXMethodDecl>(*SI)->getCanonicalDecl());
+ SPCache.find(cast<CXXMethodDecl>(SI)->getCanonicalDecl());
if (MI != SPCache.end())
EltTys.push_back(MI->second);
}
@@ -1156,15 +1156,14 @@
llvm::DIType RecordTy) {
const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
- for (CXXRecordDecl::base_class_const_iterator BI = RD->bases_begin(),
- BE = RD->bases_end(); BI != BE; ++BI) {
+ for (const auto &BI : RD->bases()) {
unsigned BFlags = 0;
uint64_t BaseOffset;
const CXXRecordDecl *Base =
- cast<CXXRecordDecl>(BI->getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
- if (BI->isVirtual()) {
+ if (BI.isVirtual()) {
// virtual base offset offset is -ve. The code generator emits dwarf
// expression where it expects +ve number.
BaseOffset =
@@ -1176,7 +1175,7 @@
// FIXME: Inconsistent units for BaseOffset. It is in bytes when
// BI->isVirtual() and bits when not.
- AccessSpecifier Access = BI->getAccessSpecifier();
+ AccessSpecifier Access = BI.getAccessSpecifier();
if (Access == clang::AS_private)
BFlags |= llvm::DIDescriptor::FlagPrivate;
else if (Access == clang::AS_protected)
@@ -1184,7 +1183,7 @@
llvm::DIType DTy =
DBuilder.createInheritance(RecordTy,
- getOrCreateType(BI->getType(), Unit),
+ getOrCreateType(BI.getType(), Unit),
BaseOffset, BFlags);
EltTys.push_back(DTy);
}
@@ -1239,7 +1238,7 @@
V = CGM.GetAddrOfFunction(FD);
// Member data pointers have special handling too to compute the fixed
// offset within the object.
- if (isa<FieldDecl>(D)) {
+ if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D)) {
// These five lines (& possibly the above member function pointer
// handling) might be able to be refactored to use similar code in
// CodeGenModule::getMemberPointerConstant
@@ -1416,6 +1415,9 @@
}
void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
+ if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+ return;
+
if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
if (CXXDecl->isDynamicClass())
return;
@@ -1439,32 +1441,57 @@
TypeCache[TyPtr] = Res;
}
+static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
+ CXXRecordDecl::method_iterator End) {
+ for (; I != End; ++I)
+ if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction())
+ if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() &&
+ !I->getMemberSpecializationInfo()->isExplicitSpecialization())
+ return true;
+ return false;
+}
+
+static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
+ const RecordDecl *RD,
+ const LangOptions &LangOpts) {
+ if (DebugKind > CodeGenOptions::LimitedDebugInfo)
+ return false;
+
+ if (!LangOpts.CPlusPlus)
+ return false;
+
+ if (!RD->isCompleteDefinitionRequired())
+ return true;
+
+ const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+
+ if (!CXXDecl)
+ return false;
+
+ if (CXXDecl->hasDefinition() && CXXDecl->isDynamicClass())
+ return true;
+
+ TemplateSpecializationKind Spec = TSK_Undeclared;
+ if (const ClassTemplateSpecializationDecl *SD =
+ dyn_cast<ClassTemplateSpecializationDecl>(RD))
+ Spec = SD->getSpecializationKind();
+
+ if (Spec == TSK_ExplicitInstantiationDeclaration &&
+ hasExplicitMemberDefinition(CXXDecl->method_begin(),
+ CXXDecl->method_end()))
+ return true;
+
+ return false;
+}
+
/// CreateType - get structure or union type.
llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
RecordDecl *RD = Ty->getDecl();
- const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
- // Always emit declarations for types that aren't required to be complete when
- // in limit-debug-info mode. If the type is later found to be required to be
- // complete this declaration will be upgraded to a definition by
- // `completeRequiredType`.
- // If the type is dynamic, only emit the definition in TUs that require class
- // data. This is handled by `completeClassData`.
llvm::DICompositeType T(getTypeOrNull(QualType(Ty, 0)));
- // If we've already emitted the type, just use that, even if it's only a
- // declaration. The completeType, completeRequiredType, and completeClassData
- // callbacks will handle promoting the declaration to a definition.
- if (T ||
- (DebugKind <= CodeGenOptions::LimitedDebugInfo &&
- // Under -flimit-debug-info, emit only a declaration unless the type is
- // required to be complete.
- !RD->isCompleteDefinitionRequired() && CGM.getLangOpts().CPlusPlus) ||
- // If the class is dynamic, only emit a declaration. A definition will be
- // emitted whenever the vtable is emitted.
- (CXXDecl && CXXDecl->hasDefinition() && CXXDecl->isDynamicClass()) || T) {
- llvm::DIDescriptor FDContext =
- getContextDescriptor(cast<Decl>(RD->getDeclContext()));
+ if (T || shouldOmitDefinition(DebugKind, RD, CGM.getLangOpts())) {
if (!T)
- T = getOrCreateRecordFwdDecl(Ty, FDContext);
+ T = getOrCreateRecordFwdDecl(
+ Ty, getContextDescriptor(cast<Decl>(RD->getDeclContext())));
return T;
}
@@ -1625,9 +1652,7 @@
}
// Create entries for all of the properties.
- for (ObjCContainerDecl::prop_iterator I = ID->prop_begin(),
- E = ID->prop_end(); I != E; ++I) {
- const ObjCPropertyDecl *PD = *I;
+ for (const auto *PD : ID->properties()) {
SourceLocation Loc = PD->getLocation();
llvm::DIFile PUnit = getOrCreateFile(Loc);
unsigned PLine = getLineNumber(Loc);
@@ -1829,11 +1854,13 @@
if (!Ty->getPointeeType()->isFunctionType())
return DBuilder.createMemberPointerType(
getOrCreateType(Ty->getPointeeType(), U), ClassType);
+
+ const FunctionProtoType *FPT =
+ Ty->getPointeeType()->getAs<FunctionProtoType>();
return DBuilder.createMemberPointerType(getOrCreateInstanceMethodType(
- CGM.getContext().getPointerType(
- QualType(Ty->getClass(), Ty->getPointeeType().getCVRQualifiers())),
- Ty->getPointeeType()->getAs<FunctionProtoType>(), U),
- ClassType);
+ CGM.getContext().getPointerType(QualType(Ty->getClass(),
+ FPT->getTypeQuals())),
+ FPT, U), ClassType);
}
llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty,
@@ -1871,9 +1898,7 @@
// Create DIEnumerator elements for each enumerator.
SmallVector<llvm::Value *, 16> Enumerators;
ED = ED->getDefinition();
- for (EnumDecl::enumerator_iterator
- Enum = ED->enumerator_begin(), EnumEnd = ED->enumerator_end();
- Enum != EnumEnd; ++Enum) {
+ for (const auto *Enum : ED->enumerators()) {
Enumerators.push_back(
DBuilder.createEnumerator(Enum->getName(),
Enum->getInitVal().getSExtValue()));
@@ -1994,6 +2019,17 @@
return llvm::DIType(cast_or_null<llvm::MDNode>(V));
}
+void CGDebugInfo::completeTemplateDefinition(
+ const ClassTemplateSpecializationDecl &SD) {
+ if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+ return;
+
+ completeClassData(&SD);
+ // In case this type has no member function definitions being emitted, ensure
+ // it is retained
+ RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr());
+}
+
/// getCachedInterfaceTypeOrNull - Get the type from the interface
/// cache, unless it needs to regenerated. Otherwise return null.
llvm::Value *CGDebugInfo::getCachedInterfaceTypeOrNull(QualType Ty) {
@@ -2123,10 +2159,11 @@
return CreateType(cast<ComplexType>(Ty));
case Type::Pointer:
return CreateType(cast<PointerType>(Ty), Unit);
+ case Type::Adjusted:
case Type::Decayed:
- // Decayed types are just pointers in LLVM and DWARF.
+ // Decayed and adjusted types use the adjusted type in LLVM and DWARF.
return CreateType(
- cast<PointerType>(cast<DecayedType>(Ty)->getDecayedType()), Unit);
+ cast<PointerType>(cast<AdjustedType>(Ty)->getAdjustedType()), Unit);
case Type::BlockPointer:
return CreateType(cast<BlockPointerType>(Ty), Unit);
case Type::Typedef:
@@ -2233,9 +2270,10 @@
if (T && (!T.isForwardDecl() || !RD->getDefinition()))
return T;
- // If this is just a forward declaration, construct an appropriately
- // marked node and just return it.
- if (!RD->getDefinition())
+ // If this is just a forward or incomplete declaration, construct an
+ // appropriately marked node and just return it.
+ const RecordDecl *D = RD->getDefinition();
+ if (!D || !D->isCompleteDefinition())
return getOrCreateRecordFwdDecl(Ty, RDContext);
uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2277,7 +2315,7 @@
llvm::DICompositeType ContainingType;
const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
- // Seek non virtual primary base root.
+ // Seek non-virtual primary base root.
while (1) {
const ASTRecordLayout &BRL = CGM.getContext().getASTRecordLayout(PBase);
const CXXRecordDecl *PBT = BRL.getPrimaryBase();
@@ -2309,7 +2347,7 @@
return Ty;
}
-llvm::DIDescriptor CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
+llvm::DIScope CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
// We only need a declaration (not a definition) of the type - so use whatever
// we would otherwise do to get a type for a pointee. (forward declarations in
// limited debug info, full definitions (if the type definition is available)
@@ -2327,9 +2365,9 @@
llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I =
DeclCache.find(D->getCanonicalDecl());
if (I == DeclCache.end())
- return llvm::DIDescriptor();
+ return llvm::DIScope();
llvm::Value *V = I->second;
- return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(V));
+ return llvm::DIScope(dyn_cast_or_null<llvm::MDNode>(V));
}
/// getFunctionDeclaration - Return debug info descriptor to describe method
@@ -2352,7 +2390,6 @@
llvm::DICompositeType T(S);
llvm::DISubprogram SP =
CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), T);
- T.addMember(SP);
return SP;
}
}
@@ -2363,9 +2400,7 @@
return SP;
}
- for (FunctionDecl::redecl_iterator I = FD->redecls_begin(),
- E = FD->redecls_end(); I != E; ++I) {
- const FunctionDecl *NextFD = *I;
+ for (auto NextFD : FD->redecls()) {
llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator
MI = SPCache.find(NextFD->getCanonicalDecl());
if (MI != SPCache.end()) {
@@ -2397,7 +2432,7 @@
SmallVector<llvm::Value *, 16> Elts;
// First element is always return type. For 'void' functions it is NULL.
- QualType ResultTy = OMethod->getResultType();
+ QualType ResultTy = OMethod->getReturnType();
// Replace the instancetype keyword with the actual type.
if (ResultTy == CGM.getContext().getObjCInstanceType())
@@ -2413,13 +2448,27 @@
llvm::DIType CmdTy = getOrCreateType(OMethod->getCmdDecl()->getType(), F);
Elts.push_back(DBuilder.createArtificialType(CmdTy));
// Get rest of the arguments.
- for (ObjCMethodDecl::param_const_iterator PI = OMethod->param_begin(),
- PE = OMethod->param_end(); PI != PE; ++PI)
- Elts.push_back(getOrCreateType((*PI)->getType(), F));
+ for (const auto *PI : OMethod->params())
+ Elts.push_back(getOrCreateType(PI->getType(), F));
llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(Elts);
return DBuilder.createSubroutineType(F, EltTypeArray);
}
+
+ // Handle variadic function types; they need an additional
+ // unspecified parameter.
+ if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+ if (FD->isVariadic()) {
+ SmallVector<llvm::Value *, 16> EltTys;
+ EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
+ if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType))
+ for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
+ EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
+ EltTys.push_back(DBuilder.createUnspecifiedParameter());
+ llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(EltTys);
+ return DBuilder.createSubroutineType(F, EltTypeArray);
+ }
+
return llvm::DICompositeType(getOrCreateType(FnType, F));
}
@@ -2434,14 +2483,25 @@
FnBeginRegionCount.push_back(LexicalBlockStack.size());
const Decl *D = GD.getDecl();
- // Function may lack declaration in source code if it is created by Clang
- // CodeGen (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
+
+ // Use the location of the start of the function to determine where
+ // the function definition is located. By default use the location
+ // of the declaration as the location for the subprogram. A function
+ // may lack a declaration in the source code if it is created by code
+ // gen. (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
bool HasDecl = (D != 0);
- // Use the location of the declaration.
SourceLocation Loc;
- if (HasDecl)
+ if (HasDecl) {
Loc = D->getLocation();
+ // If this is a function specialization then use the pattern body
+ // as the location for the function.
+ if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+ if (const FunctionDecl *SpecDecl = FD->getTemplateInstantiationPattern())
+ if (SpecDecl->hasBody(SpecDecl))
+ Loc = SpecDecl->getLocation();
+ }
+
unsigned Flags = 0;
llvm::DIFile Unit = getOrCreateFile(Loc);
llvm::DIDescriptor FDContext(Unit);
@@ -2504,6 +2564,11 @@
if (!HasDecl || D->isImplicit())
Flags |= llvm::DIDescriptor::FlagArtificial;
+ // FIXME: The function declaration we're constructing here is mostly reusing
+ // declarations from CXXMethodDecl and not constructing new ones for arbitrary
+ // FunctionDecls. When/if we fix this we can have FDContext be TheCU/null for
+ // all subprograms instead of the actual context since subprogram definitions
+ // are emitted as CU level entities by the backend.
llvm::DISubprogram SP =
DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
getOrCreateFunctionType(D, FnType, Unit),
@@ -2514,9 +2579,10 @@
if (HasDecl)
DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(SP)));
- // Push function on region stack.
+ // Push the function onto the lexical block stack.
llvm::MDNode *SPN = SP;
LexicalBlockStack.push_back(SPN);
+
if (HasDecl)
RegionMap[D] = llvm::WeakVH(SP);
}
@@ -2560,7 +2626,8 @@
llvm::DIDescriptor(LexicalBlockStack.back()),
getOrCreateFile(CurLoc),
getLineNumber(CurLoc),
- getColumnNumber(CurLoc));
+ getColumnNumber(CurLoc),
+ 0);
llvm::MDNode *DN = D;
LexicalBlockStack.push_back(DN);
}
@@ -2768,10 +2835,7 @@
// all union fields.
const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
- for (RecordDecl::field_iterator I = RD->field_begin(),
- E = RD->field_end();
- I != E; ++I) {
- FieldDecl *Field = *I;
+ for (const auto *Field : RD->fields()) {
llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
StringRef FieldName = Field->getName();
@@ -2970,10 +3034,7 @@
}
// Variable captures.
- for (BlockDecl::capture_const_iterator
- i = blockDecl->capture_begin(), e = blockDecl->capture_end();
- i != e; ++i) {
- const BlockDecl::Capture &capture = *i;
+ for (const auto &capture : blockDecl->captures()) {
const VarDecl *variable = capture.getVariable();
const CGBlockInfo::Capture &captureInfo = block.getCapture(variable);
@@ -3085,7 +3146,6 @@
llvm::DICompositeType Ctxt(
getContextDescriptor(cast<Decl>(D->getDeclContext())));
llvm::DIDerivedType T = CreateRecordStaticField(D, Ctxt);
- Ctxt.addMember(T);
return T;
}
@@ -3196,7 +3256,7 @@
// Emitting one decl is sufficient - debuggers can detect that this is an
// overloaded name & provide lookup for all the overloads.
const UsingShadowDecl &USD = **UD.shadow_begin();
- if (llvm::DIDescriptor Target =
+ if (llvm::DIScope Target =
getDeclarationOrDefinition(USD.getUnderlyingDecl()))
DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 0ca274f..52784da 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -20,10 +20,10 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/ValueHandle.h"
namespace llvm {
class MDNode;
@@ -47,8 +47,8 @@
/// and is responsible for emitting to llvm globals or pass directly to
/// the backend.
class CGDebugInfo {
- friend class NoLocation;
friend class ArtificialLocation;
+ friend class SaveAndRestoreLocation;
CodeGenModule &CGM;
const CodeGenOptions::DebugInfoKind DebugKind;
llvm::DIBuilder DBuilder;
@@ -288,6 +288,8 @@
void completeRequiredType(const RecordDecl *RD);
void completeClassData(const RecordDecl *RD);
+ void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD);
+
private:
/// EmitDeclare - Emit call to llvm.dbg.declare for a variable declaration.
void EmitDeclare(const VarDecl *decl, unsigned Tag, llvm::Value *AI,
@@ -342,9 +344,9 @@
llvm::DIType CreateMemberType(llvm::DIFile Unit, QualType FType,
StringRef Name, uint64_t *Offset);
- /// \brief Retrieve the DIDescriptor, if any, for the canonical form of this
+ /// \brief Retrieve the DIScope, if any, for the canonical form of this
/// declaration.
- llvm::DIDescriptor getDeclarationOrDefinition(const Decl *D);
+ llvm::DIScope getDeclarationOrDefinition(const Decl *D);
/// getFunctionDeclaration - Return debug info descriptor to describe method
/// declaration for the given method definition.
@@ -394,16 +396,26 @@
}
};
-/// NoLocation - An RAII object that temporarily disables debug
-/// locations. This is useful for emitting instructions that should be
-/// counted towards the function prologue.
-class NoLocation {
+/// SaveAndRestoreLocation - An RAII object saves the current location
+/// and automatically restores it to the original value.
+class SaveAndRestoreLocation {
+protected:
SourceLocation SavedLoc;
CGDebugInfo *DI;
CGBuilderTy &Builder;
public:
+ SaveAndRestoreLocation(CodeGenFunction &CGF, CGBuilderTy &B);
+ /// Autorestore everything back to normal.
+ ~SaveAndRestoreLocation();
+};
+
+/// NoLocation - An RAII object that temporarily disables debug
+/// locations. This is useful for emitting instructions that should be
+/// counted towards the function prologue.
+class NoLocation : public SaveAndRestoreLocation {
+public:
NoLocation(CodeGenFunction &CGF, CGBuilderTy &B);
- /// ~NoLocation - Autorestore everything back to normal.
+ /// Autorestore everything back to normal.
~NoLocation();
};
@@ -418,10 +430,7 @@
/// This is necessary because passing an empty SourceLocation to
/// CGDebugInfo::setLocation() will result in the last valid location
/// being reused.
-class ArtificialLocation {
- SourceLocation SavedLoc;
- CGDebugInfo *DI;
- CGBuilderTy &Builder;
+class ArtificialLocation : public SaveAndRestoreLocation {
public:
ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B);
@@ -429,7 +438,7 @@
/// (= the top of the LexicalBlockStack).
void Emit();
- /// ~ArtificialLocation - Autorestore everything back to normal.
+ /// Autorestore everything back to normal.
~ArtificialLocation();
};
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 66d6b33..9db3c91 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -183,7 +183,7 @@
return ContextName + Separator + D.getNameAsString();
}
-llvm::GlobalVariable *
+llvm::Constant *
CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
const char *Separator,
llvm::GlobalValue::LinkageTypes Linkage) {
@@ -212,6 +212,13 @@
if (D.getTLSKind())
CGM.setTLSMode(GV, D);
+ // Make sure the result is of the correct type.
+ unsigned ExpectedAddrSpace = CGM.getContext().getTargetAddressSpace(Ty);
+ if (AddrSpace != ExpectedAddrSpace) {
+ llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
+ return llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+ }
+
return GV;
}
@@ -298,12 +305,8 @@
llvm::Constant *addr =
CGM.getStaticLocalDeclAddress(&D);
- llvm::GlobalVariable *var;
- if (addr) {
- var = cast<llvm::GlobalVariable>(addr->stripPointerCasts());
- } else {
- addr = var = CreateStaticVarDecl(D, ".", Linkage);
- }
+ if (!addr)
+ addr = CreateStaticVarDecl(D, ".", Linkage);
// Store into LocalDeclMap before generating initializer to handle
// circular references.
@@ -319,6 +322,8 @@
// Save the type in case adding the initializer forces a type change.
llvm::Type *expectedType = addr->getType();
+ llvm::GlobalVariable *var =
+ cast<llvm::GlobalVariable>(addr->stripPointerCasts());
// If this value has an initializer, emit it.
if (D.getInit())
var = AddInitializerToStaticVarDecl(D, var);
@@ -332,14 +337,15 @@
var->setSection(SA->getName());
if (D.hasAttr<UsedAttr>())
- CGM.AddUsedGlobal(var);
+ CGM.addUsedGlobal(var);
// We may have to cast the constant because of the initializer
// mismatch above.
//
// FIXME: It is really dangerous to store this in the map; if anyone
// RAUW's the GV uses of this constant will be invalid.
- llvm::Constant *castedAddr = llvm::ConstantExpr::getBitCast(var, expectedType);
+ llvm::Constant *castedAddr =
+ llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType);
DMEntry = castedAddr;
CGM.setStaticLocalDeclAddress(&D, castedAddr);
@@ -365,7 +371,7 @@
CodeGenFunction::Destroyer *destroyer;
bool useEHCleanupForArray;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Don't use an EH cleanup recursively from an EH cleanup.
bool useEHCleanupForArray =
flags.isForNormalCleanup() && this->useEHCleanupForArray;
@@ -384,7 +390,7 @@
llvm::Value *NRVOFlag;
llvm::Value *Loc;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Along the exceptions path we always execute the dtor.
bool NRVO = flags.isForNormalCleanup() && NRVOFlag;
@@ -410,7 +416,7 @@
struct CallStackRestore : EHScopeStack::Cleanup {
llvm::Value *Stack;
CallStackRestore(llvm::Value *Stack) : Stack(Stack) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *V = CGF.Builder.CreateLoad(Stack);
llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
CGF.Builder.CreateCall(F, V);
@@ -421,7 +427,7 @@
const VarDecl &Var;
ExtendGCLifetime(const VarDecl *var) : Var(*var) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Compute the address of the local variable, in case it's a
// byref or something.
DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
@@ -441,7 +447,7 @@
const VarDecl *Var)
: CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
Var.getType(), VK_LValue, SourceLocation());
// Compute the address of the local variable, in case it's a byref
@@ -473,7 +479,7 @@
CallLifetimeEnd(llvm::Value *addr, llvm::Value *size)
: Addr(addr), Size(size) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *castAddr = CGF.Builder.CreateBitCast(Addr, CGF.Int8PtrTy);
CGF.Builder.CreateCall2(CGF.CGM.getLLVMLifetimeEndFn(),
Size, castAddr)
@@ -530,9 +536,8 @@
return (ref->getDecl() == &var);
if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
const BlockDecl *block = be->getBlockDecl();
- for (BlockDecl::capture_const_iterator i = block->capture_begin(),
- e = block->capture_end(); i != e; ++i) {
- if (i->getVariable() == &var)
+ for (const auto &I : block->captures()) {
+ if (I.getVariable() == &var)
return true;
}
}
@@ -571,7 +576,10 @@
EmitStoreThroughLValue(RValue::get(value), lvalue, true);
return;
}
-
+
+ if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init))
+ init = DIE->getExpr();
+
// If we're emitting a value with lifetime, we have to do the
// initialization *before* we leave the cleanup scopes.
if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
@@ -823,7 +831,7 @@
}
/// EmitAutoVarAlloca - Emit the alloca and debug information for a
-/// local variable. Does not emit initalization or destruction.
+/// local variable. Does not emit initialization or destruction.
CodeGenFunction::AutoVarEmission
CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
QualType Ty = D.getType();
@@ -944,12 +952,12 @@
// Push a cleanup block and restore the stack there.
// FIXME: in general circumstances, this should be an EH cleanup.
- EHStack.pushCleanup<CallStackRestore>(NormalCleanup, Stack);
+ pushStackRestore(NormalCleanup, Stack);
}
llvm::Value *elementCount;
QualType elementType;
- llvm::tie(elementCount, elementType) = getVLASize(Ty);
+ std::tie(elementCount, elementType) = getVLASize(Ty);
llvm::Type *llvmTy = ConvertTypeForMem(elementType);
@@ -990,9 +998,8 @@
if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
const BlockDecl *block = be->getBlockDecl();
- for (BlockDecl::capture_const_iterator i = block->capture_begin(),
- e = block->capture_end(); i != e; ++i) {
- if (i->getVariable() == &var)
+ for (const auto &I : block->captures()) {
+ if (I.getVariable() == &var)
return true;
}
@@ -1002,18 +1009,16 @@
if (const StmtExpr *SE = dyn_cast<StmtExpr>(e)) {
const CompoundStmt *CS = SE->getSubStmt();
- for (CompoundStmt::const_body_iterator BI = CS->body_begin(),
- BE = CS->body_end(); BI != BE; ++BI)
- if (Expr *E = dyn_cast<Expr>((*BI))) {
+ for (const auto *BI : CS->body())
+ if (const auto *E = dyn_cast<Expr>(BI)) {
if (isCapturedBy(var, E))
return true;
}
- else if (DeclStmt *DS = dyn_cast<DeclStmt>((*BI))) {
+ else if (const auto *DS = dyn_cast<DeclStmt>(BI)) {
// special case declarations
- for (DeclStmt::decl_iterator I = DS->decl_begin(), E = DS->decl_end();
- I != E; ++I) {
- if (VarDecl *VD = dyn_cast<VarDecl>((*I))) {
- Expr *Init = VD->getInit();
+ for (const auto *I : DS->decls()) {
+ if (const auto *VD = dyn_cast<VarDecl>((I))) {
+ const Expr *Init = VD->getInit();
if (Init && isCapturedBy(var, Init))
return true;
}
@@ -1344,6 +1349,10 @@
destroyer, useEHCleanupForArray);
}
+void CodeGenFunction::pushStackRestore(CleanupKind Kind, llvm::Value *SPMem) {
+ EHStack.pushCleanup<CallStackRestore>(Kind, SPMem);
+}
+
void CodeGenFunction::pushLifetimeExtendedDestroy(
CleanupKind cleanupKind, llvm::Value *addr, QualType type,
Destroyer *destroyer, bool useEHCleanupForArray) {
@@ -1505,7 +1514,7 @@
: ArrayBegin(arrayBegin), ArrayEnd(arrayEnd),
ElementType(elementType), Destroyer(destroyer) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
emitPartialArrayDestroy(CGF, ArrayBegin, ArrayEnd,
ElementType, Destroyer);
}
@@ -1527,7 +1536,7 @@
: ArrayBegin(arrayBegin), ArrayEndPointer(arrayEndPointer),
ElementType(elementType), Destroyer(destroyer) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *arrayEnd = CGF.Builder.CreateLoad(ArrayEndPointer);
emitPartialArrayDestroy(CGF, ArrayBegin, arrayEnd,
ElementType, Destroyer);
@@ -1594,7 +1603,7 @@
llvm::Value *Param;
ARCPreciseLifetime_t Precise;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitARCRelease(Param, Precise);
}
};
@@ -1603,7 +1612,7 @@
/// Emit an alloca (or GlobalValue depending on target)
/// for the specified parameter and set up LocalDeclMap.
void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
- unsigned ArgNo) {
+ bool ArgIsPointer, unsigned ArgNo) {
// FIXME: Why isn't ImplicitParamDecl a ParmVarDecl?
assert((isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D)) &&
"Invalid argument to EmitParmDecl");
@@ -1641,30 +1650,35 @@
}
llvm::Value *DeclPtr;
- bool HasNonScalarEvalKind = !CodeGenFunction::hasScalarEvaluationKind(Ty);
- // If this is an aggregate or variable sized value, reuse the input pointer.
- if (HasNonScalarEvalKind || !Ty->isConstantSizeType()) {
- DeclPtr = Arg;
+ bool DoStore = false;
+ bool IsScalar = hasScalarEvaluationKind(Ty);
+ CharUnits Align = getContext().getDeclAlign(&D);
+ // If we already have a pointer to the argument, reuse the input pointer.
+ if (ArgIsPointer) {
+ // If we have a prettier pointer type at this point, bitcast to that.
+ unsigned AS = cast<llvm::PointerType>(Arg->getType())->getAddressSpace();
+ llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
+ DeclPtr = Arg->getType() == IRTy ? Arg : Builder.CreateBitCast(Arg, IRTy,
+ D.getName());
// Push a destructor cleanup for this parameter if the ABI requires it.
- if (HasNonScalarEvalKind &&
- getTarget().getCXXABI().isArgumentDestroyedByCallee()) {
- if (const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl()) {
- if (RD->hasNonTrivialDestructor())
- pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
- }
+ if (!IsScalar &&
+ getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+ const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+ if (RD && RD->hasNonTrivialDestructor())
+ pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
}
} else {
// Otherwise, create a temporary to hold the value.
llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
D.getName() + ".addr");
- CharUnits Align = getContext().getDeclAlign(&D);
Alloc->setAlignment(Align.getQuantity());
DeclPtr = Alloc;
+ DoStore = true;
+ }
- bool doStore = true;
-
+ LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
+ if (IsScalar) {
Qualifiers qs = Ty.getQualifiers();
- LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
// We honor __attribute__((ns_consumed)) for types with lifetime.
// For __strong, it's handled by just skipping the initial retain;
@@ -1693,7 +1707,7 @@
llvm::Value *Null = CGM.EmitNullConstant(D.getType());
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
EmitARCStoreStrongCall(lv.getAddress(), Arg, true);
- doStore = false;
+ DoStore = false;
}
else
// Don't use objc_retainBlock for block pointers, because we
@@ -1712,19 +1726,19 @@
if (lt == Qualifiers::OCL_Weak) {
EmitARCInitWeak(DeclPtr, Arg);
- doStore = false; // The weak init is a store, no need to do two.
+ DoStore = false; // The weak init is a store, no need to do two.
}
}
// Enter the cleanup scope.
EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
}
-
- // Store the initial value into the alloca.
- if (doStore)
- EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
}
+ // Store the initial value into the alloca.
+ if (DoStore)
+ EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
+
llvm::Value *&DMEntry = LocalDeclMap[&D];
assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
DMEntry = DeclPtr;
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 7bdb9eb..bc8620d 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -281,9 +281,8 @@
// FIXME: We only need to register one __cxa_thread_atexit function for the
// entire TU.
CXXThreadLocalInits.push_back(Fn);
- } else if (D->hasAttr<InitPriorityAttr>()) {
- unsigned int order = D->getAttr<InitPriorityAttr>()->getPriority();
- OrderGlobalInits Key(order, PrioritizedCXXGlobalInits.size());
+ } else if (const InitPriorityAttr *IPA = D->getAttr<InitPriorityAttr>()) {
+ OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size());
PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
DelayedCXXInitPosition.erase(D);
} else if (D->getTemplateSpecializationKind() != TSK_ExplicitSpecialization &&
@@ -503,11 +502,9 @@
FunctionArgList args;
ImplicitParamDecl dst(0, SourceLocation(), 0, getContext().VoidPtrTy);
args.push_back(&dst);
-
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeFunctionDeclaration(getContext().VoidTy, args,
- FunctionType::ExtInfo(),
- /*variadic*/ false);
+
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *fn =
CreateGlobalInitOrDestructFunction(CGM, FTy, "__cxx_global_array_dtor");
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 39a992a..ca31717 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -17,8 +17,8 @@
#include "TargetInfo.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
using namespace clang;
using namespace CodeGen;
@@ -263,12 +263,9 @@
/// Check whether a personality function could reasonably be swapped
/// for a C++ personality function.
static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
- for (llvm::Constant::use_iterator
- I = Fn->use_begin(), E = Fn->use_end(); I != E; ++I) {
- llvm::User *User = *I;
-
+ for (llvm::User *U : Fn->users()) {
// Conditionally white-list bitcasts.
- if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(User)) {
+ if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(U)) {
if (CE->getOpcode() != llvm::Instruction::BitCast) return false;
if (!PersonalityHasOnlyCXXUses(CE))
return false;
@@ -276,7 +273,7 @@
}
// Otherwise, it has to be a landingpad instruction.
- llvm::LandingPadInst *LPI = dyn_cast<llvm::LandingPadInst>(User);
+ llvm::LandingPadInst *LPI = dyn_cast<llvm::LandingPadInst>(U);
if (!LPI) return false;
for (unsigned I = 0, E = LPI->getNumClauses(); I != E; ++I) {
@@ -363,7 +360,7 @@
struct FreeException : EHScopeStack::Cleanup {
llvm::Value *exn;
FreeException(llvm::Value *exn) : exn(exn) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitNounwindRuntimeCall(getFreeExceptionFn(CGF.CGM), exn);
}
};
@@ -766,11 +763,9 @@
// Save the current IR generation state.
CGBuilderTy::InsertPoint savedIP = Builder.saveAndClearIP();
- SourceLocation SavedLocation;
- if (CGDebugInfo *DI = getDebugInfo()) {
- SavedLocation = DI->getLocation();
+ SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
+ if (CGDebugInfo *DI = getDebugInfo())
DI->EmitLocation(Builder, CurEHLocation);
- }
const EHPersonality &personality = EHPersonality::get(getLangOpts());
@@ -892,8 +887,6 @@
// Restore the old IR generation state.
Builder.restoreIP(savedIP);
- if (CGDebugInfo *DI = getDebugInfo())
- DI->EmitLocation(Builder, SavedLocation);
return lpad;
}
@@ -915,7 +908,7 @@
CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
bool MightThrow;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
if (!MightThrow) {
CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
return;
@@ -1244,6 +1237,7 @@
// If the catch was not required, bail out now.
if (!CatchScope.hasEHBranches()) {
+ CatchScope.clearHandlerBlocks();
EHStack.popCatch();
return;
}
@@ -1294,6 +1288,10 @@
// Initialize the catch variable and set up the cleanups.
BeginCatch(*this, C);
+ // Emit the PGO counter increment.
+ RegionCounter CatchCnt = getPGORegionCounter(C);
+ CatchCnt.beginRegion(Builder);
+
// Perform the body of the catch.
EmitStmt(C->getHandlerBlock());
@@ -1320,7 +1318,9 @@
Builder.CreateBr(ContBB);
}
+ RegionCounter ContCnt = getPGORegionCounter(&S);
EmitBlock(ContBB);
+ ContCnt.beginRegion(Builder);
}
namespace {
@@ -1330,7 +1330,7 @@
CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn)
: ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::BasicBlock *EndCatchBB = CGF.createBasicBlock("finally.endcatch");
llvm::BasicBlock *CleanupContBB =
CGF.createBasicBlock("finally.cleanup.cont");
@@ -1357,7 +1357,7 @@
: Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn),
RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Enter a cleanup to call the end-catch function if one was provided.
if (EndCatchFn)
CGF.EHStack.pushCleanup<CallEndCatchForFinally>(NormalAndEHCleanup,
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index cb990b2..1bdd094 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -85,6 +85,7 @@
/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
/// expression and compare the result against zero, returning an Int1Ty value.
llvm::Value *CodeGenFunction::EvaluateExprAsBool(const Expr *E) {
+ PGO.setCurrentStmt(E);
if (const MemberPointerType *MPT = E->getType()->getAs<MemberPointerType>()) {
llvm::Value *MemPtr = EmitScalarExpr(E);
return CGM.getCXXABI().EmitMemberPointerIsNotNull(*this, MemPtr, MPT);
@@ -389,7 +390,7 @@
case SubobjectAdjustment::MemberPointerAdjustment: {
llvm::Value *Ptr = EmitScalarExpr(Adjustment.Ptr.RHS);
Object = CGM.getCXXABI().EmitMemberDataPointerAddress(
- *this, Object, Ptr, Adjustment.Ptr.MPT);
+ *this, E, Object, Ptr, Adjustment.Ptr.MPT);
break;
}
}
@@ -1690,11 +1691,16 @@
static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
const Expr *E, const VarDecl *VD) {
+ QualType T = E->getType();
+
+ // If it's thread_local, emit a call to its wrapper function instead.
+ if (VD->getTLSKind() == VarDecl::TLS_Dynamic)
+ return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
+
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
- QualType T = E->getType();
LValue LV;
if (VD->getType()->isReferenceType()) {
llvm::LoadInst *LI = CGF.Builder.CreateLoad(V);
@@ -1702,7 +1708,7 @@
V = LI;
LV = CGF.MakeNaturalAlignAddrLValue(V, T);
} else {
- LV = CGF.MakeAddrLValue(V, E->getType(), Alignment);
+ LV = CGF.MakeAddrLValue(V, T, Alignment);
}
setObjCGCLValueClass(CGF.getContext(), E, LV);
return LV;
@@ -1718,7 +1724,7 @@
// isn't the same as the type of a use. Correct for this with a
// bitcast.
QualType NoProtoType =
- CGF.getContext().getFunctionNoProtoType(Proto->getResultType());
+ CGF.getContext().getFunctionNoProtoType(Proto->getReturnType());
NoProtoType = CGF.getContext().getPointerType(NoProtoType);
V = CGF.Builder.CreateBitCast(V, CGF.ConvertType(NoProtoType));
}
@@ -1769,12 +1775,8 @@
if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
// Check if this is a global variable.
- if (VD->hasLinkage() || VD->isStaticDataMember()) {
- // If it's thread_local, emit a call to its wrapper function instead.
- if (VD->getTLSKind() == VarDecl::TLS_Dynamic)
- return CGM.getCXXABI().EmitThreadLocalDeclRefExpr(*this, E);
+ if (VD->hasLinkage() || VD->isStaticDataMember())
return EmitGlobalVarDeclLValue(*this, E, VD);
- }
bool isBlockVariable = VD->hasAttr<BlocksAttr>();
@@ -2651,6 +2653,7 @@
}
OpaqueValueMapping binding(*this, expr);
+ RegionCounter Cnt = getPGORegionCounter(expr);
const Expr *condExpr = expr->getCond();
bool CondExprBool;
@@ -2658,8 +2661,12 @@
const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
if (!CondExprBool) std::swap(live, dead);
- if (!ContainsLabel(dead))
+ if (!ContainsLabel(dead)) {
+ // If the true case is live, we need to track its region.
+ if (CondExprBool)
+ Cnt.beginRegion(Builder);
return EmitLValue(live);
+ }
}
llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
@@ -2667,10 +2674,11 @@
llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
+ EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
// Any temporaries created here are conditional.
EmitBlock(lhsBlock);
+ Cnt.beginRegion(Builder);
eval.begin(*this);
LValue lhs = EmitLValue(expr->getTrueExpr());
eval.end(*this);
@@ -2744,6 +2752,7 @@
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
+ case CK_AddressSpaceConversion:
return EmitUnsupportedLValue(E, "unexpected cast lvalue");
case CK_Dependent:
@@ -3061,7 +3070,7 @@
if (!RV.isScalar())
return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
- assert(E->getMethodDecl()->getResultType()->isReferenceType() &&
+ assert(E->getMethodDecl()->getReturnType()->isReferenceType() &&
"Can't have a scalar return unless the return type is a "
"reference type!");
@@ -3231,8 +3240,8 @@
const MemberPointerType *MPT
= E->getRHS()->getType()->getAs<MemberPointerType>();
- llvm::Value *AddV =
- CGM.getCXXABI().EmitMemberDataPointerAddress(*this, BaseV, OffsetV, MPT);
+ llvm::Value *AddV = CGM.getCXXABI().EmitMemberDataPointerAddress(
+ *this, E, BaseV, OffsetV, MPT);
return MakeAddrLValue(AddV, MPT->getPointeeType());
}
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 9d0f3a9..6c50521 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -713,6 +713,7 @@
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
case CK_ZeroToOCLEvent:
+ case CK_AddressSpaceConversion:
llvm_unreachable("cast kind invalid for aggregate types");
}
}
@@ -891,14 +892,16 @@
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
// Save whether the destination's lifetime is externally managed.
bool isExternallyDestructed = Dest.isExternallyDestructed();
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
Visit(E->getTrueExpr());
eval.end(CGF);
@@ -928,7 +931,11 @@
llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
if (!ArgPtr) {
- CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
+ // If EmitVAArg fails, we fall back to the LLVM instruction.
+ llvm::Value *Val =
+ Builder.CreateVAArg(ArgValue, CGF.ConvertType(VE->getType()));
+ if (!Dest.isIgnored())
+ Builder.CreateStore(Val, Dest.getAddr());
return;
}
@@ -1134,9 +1141,7 @@
#ifndef NDEBUG
// Make sure that it's really an empty and not a failure of
// semantic analysis.
- for (RecordDecl::field_iterator Field = record->field_begin(),
- FieldEnd = record->field_end();
- Field != FieldEnd; ++Field)
+ for (const auto *Field : record->fields())
assert(Field->isUnnamedBitfield() && "Only unnamed bitfields allowed");
#endif
return;
@@ -1165,9 +1170,7 @@
// Here we iterate over the fields; this makes it simpler to both
// default-initialize fields and skip over unnamed fields.
unsigned curInitIndex = 0;
- for (RecordDecl::field_iterator field = record->field_begin(),
- fieldEnd = record->field_end();
- field != fieldEnd; ++field) {
+ for (const auto *field : record->fields()) {
// We're done once we hit the flexible array member.
if (field->getType()->isIncompleteArrayType())
break;
@@ -1184,7 +1187,7 @@
break;
- LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, *field);
+ LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, field);
// We never generate write-barries for initialized fields.
LV.setNonGC(true);
@@ -1261,8 +1264,7 @@
CharUnits NumNonZeroBytes = CharUnits::Zero();
unsigned ILEElement = 0;
- for (RecordDecl::field_iterator Field = SD->field_begin(),
- FieldEnd = SD->field_end(); Field != FieldEnd; ++Field) {
+ for (const auto *Field : SD->fields()) {
// We're done once we hit the flexible array member or run out of
// InitListExpr elements.
if (Field->getType()->isIncompleteArrayType() ||
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index cc7b24d..f71a3de 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -18,8 +18,8 @@
#include "CGObjCRuntime.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
using namespace clang;
using namespace CodeGen;
@@ -119,8 +119,8 @@
// type of MD and has a prefix.
// For now we just avoid devirtualizing these covariant cases.
if (DevirtualizedMethod &&
- DevirtualizedMethod->getResultType().getCanonicalType() !=
- MD->getResultType().getCanonicalType())
+ DevirtualizedMethod->getReturnType().getCanonicalType() !=
+ MD->getReturnType().getCanonicalType())
DevirtualizedMethod = NULL;
}
@@ -220,8 +220,10 @@
}
}
- if (MD->isVirtual())
- This = CGM.getCXXABI().adjustThisArgumentForVirtualCall(*this, MD, This);
+ if (MD->isVirtual()) {
+ This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+ *this, MD, This, UseVirtualCall);
+ }
return EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
/*ImplicitParam=*/0, QualType(),
@@ -260,7 +262,7 @@
// Ask the ABI to load the callee. Note that This is modified.
llvm::Value *Callee =
- CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, This, MemFnPtr, MPT);
+ CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, BO, This, MemFnPtr, MPT);
CallArgList Args;
@@ -316,7 +318,7 @@
const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(Base);
CharUnits Size = Layout.getNonVirtualSize();
- CharUnits Align = Layout.getNonVirtualAlign();
+ CharUnits Align = Layout.getNonVirtualAlignment();
llvm::Value *SizeVal = CGF.CGM.getSize(Size);
@@ -864,9 +866,29 @@
cleanupDominator->eraseFromParent();
}
- // Advance to the next element.
- llvm::Value *nextPtr = Builder.CreateConstGEP1_32(curPtr, 1, "array.next");
+ // FIXME: The code below intends to initialize the individual array base
+ // elements, one at a time - but when dealing with multi-dimensional arrays -
+ // the pointer arithmetic can get confused - so the fix below entails casting
+ // to the allocated type to ensure that we get the pointer arithmetic right.
+ // It seems like the right approach here, it to really initialize the
+ // individual array base elements one at a time since it'll generate less
+ // code. I think the problem is that the wrong type is being passed into
+ // StoreAnyExprIntoOneUnit, but directly fixing that doesn't really work,
+ // because the Init expression has the wrong type at this point.
+ // So... this is ok for a quick fix, but we can and should do a lot better
+ // here long-term.
+ // Advance to the next element by adjusting the pointer type as necessary.
+ // For new int[10][20][30], alloc type is int[20][30], base type is 'int'.
+ QualType AllocType = E->getAllocatedType();
+ llvm::Type *AllocPtrTy = ConvertTypeForMem(AllocType)->getPointerTo(
+ curPtr->getType()->getPointerAddressSpace());
+ llvm::Value *curPtrAllocTy = Builder.CreateBitCast(curPtr, AllocPtrTy);
+ llvm::Value *nextPtrAllocTy =
+ Builder.CreateConstGEP1_32(curPtrAllocTy, 1, "array.next");
+ // Cast it back to the base type so that we can compare it to the endPtr.
+ llvm::Value *nextPtr =
+ Builder.CreateBitCast(nextPtrAllocTy, endPtr->getType());
// Check whether we've gotten to the end of the array and, if so,
// exit the loop.
llvm::Value *isEnd = Builder.CreateICmpEQ(nextPtr, endPtr, "array.atend");
@@ -991,20 +1013,20 @@
getPlacementArgs()[I] = Arg;
}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const FunctionProtoType *FPT
= OperatorDelete->getType()->getAs<FunctionProtoType>();
- assert(FPT->getNumArgs() == NumPlacementArgs + 1 ||
- (FPT->getNumArgs() == 2 && NumPlacementArgs == 0));
+ assert(FPT->getNumParams() == NumPlacementArgs + 1 ||
+ (FPT->getNumParams() == 2 && NumPlacementArgs == 0));
CallArgList DeleteArgs;
// The first argument is always a void*.
- FunctionProtoType::arg_type_iterator AI = FPT->arg_type_begin();
+ FunctionProtoType::param_type_iterator AI = FPT->param_type_begin();
DeleteArgs.add(RValue::get(Ptr), *AI++);
// A member 'operator delete' can take an extra 'size_t' argument.
- if (FPT->getNumArgs() == NumPlacementArgs + 2)
+ if (FPT->getNumParams() == NumPlacementArgs + 2)
DeleteArgs.add(RValue::get(AllocSize), *AI++);
// Pass the rest of the arguments, which must match exactly.
@@ -1046,20 +1068,20 @@
getPlacementArgs()[I] = Arg;
}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const FunctionProtoType *FPT
= OperatorDelete->getType()->getAs<FunctionProtoType>();
- assert(FPT->getNumArgs() == NumPlacementArgs + 1 ||
- (FPT->getNumArgs() == 2 && NumPlacementArgs == 0));
+ assert(FPT->getNumParams() == NumPlacementArgs + 1 ||
+ (FPT->getNumParams() == 2 && NumPlacementArgs == 0));
CallArgList DeleteArgs;
// The first argument is always a void*.
- FunctionProtoType::arg_type_iterator AI = FPT->arg_type_begin();
+ FunctionProtoType::param_type_iterator AI = FPT->param_type_begin();
DeleteArgs.add(Ptr.restore(CGF), *AI++);
// A member 'operator delete' can take an extra 'size_t' argument.
- if (FPT->getNumArgs() == NumPlacementArgs + 2) {
+ if (FPT->getNumParams() == NumPlacementArgs + 2) {
RValue RV = AllocSize.restore(CGF);
DeleteArgs.add(RV, *AI++);
}
@@ -1145,35 +1167,12 @@
allocatorArgs.add(RValue::get(allocSize), sizeType);
- // Emit the rest of the arguments.
- // FIXME: Ideally, this should just use EmitCallArgs.
- CXXNewExpr::const_arg_iterator placementArg = E->placement_arg_begin();
-
- // First, use the types from the function type.
// We start at 1 here because the first argument (the allocation size)
// has already been emitted.
- for (unsigned i = 1, e = allocatorType->getNumArgs(); i != e;
- ++i, ++placementArg) {
- QualType argType = allocatorType->getArgType(i);
-
- assert(getContext().hasSameUnqualifiedType(argType.getNonReferenceType(),
- placementArg->getType()) &&
- "type mismatch in call argument!");
-
- EmitCallArg(allocatorArgs, *placementArg, argType);
- }
-
- // Either we've emitted all the call args, or we have a call to a
- // variadic function.
- assert((placementArg == E->placement_arg_end() ||
- allocatorType->isVariadic()) &&
- "Extra arguments to non-variadic function!");
-
- // If we still have any arguments, emit them using the type of the argument.
- for (CXXNewExpr::const_arg_iterator placementArgsEnd = E->placement_arg_end();
- placementArg != placementArgsEnd; ++placementArg) {
- EmitCallArg(allocatorArgs, *placementArg, placementArg->getType());
- }
+ EmitCallArgs(allocatorArgs, allocatorType->isVariadic(),
+ allocatorType->param_type_begin() + 1,
+ allocatorType->param_type_end(), E->placement_arg_begin(),
+ E->placement_arg_end());
// Emit the allocation call. If the allocator is a global placement
// operator, just "inline" it directly.
@@ -1289,14 +1288,14 @@
// Check if we need to pass the size to the delete operator.
llvm::Value *Size = 0;
QualType SizeTy;
- if (DeleteFTy->getNumArgs() == 2) {
- SizeTy = DeleteFTy->getArgType(1);
+ if (DeleteFTy->getNumParams() == 2) {
+ SizeTy = DeleteFTy->getParamType(1);
CharUnits DeleteTypeSize = getContext().getTypeSizeInChars(DeleteTy);
Size = llvm::ConstantInt::get(ConvertType(SizeTy),
DeleteTypeSize.getQuantity());
}
-
- QualType ArgTy = DeleteFTy->getArgType(0);
+
+ QualType ArgTy = DeleteFTy->getParamType(0);
llvm::Value *DeletePtr = Builder.CreateBitCast(Ptr, ConvertType(ArgTy));
DeleteArgs.add(RValue::get(DeletePtr), ArgTy);
@@ -1319,7 +1318,7 @@
QualType ElementType)
: Ptr(Ptr), OperatorDelete(OperatorDelete), ElementType(ElementType) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitDeleteCall(OperatorDelete, Ptr, ElementType);
}
};
@@ -1422,22 +1421,22 @@
: Ptr(Ptr), OperatorDelete(OperatorDelete), NumElements(NumElements),
ElementType(ElementType), CookieSize(CookieSize) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const FunctionProtoType *DeleteFTy =
OperatorDelete->getType()->getAs<FunctionProtoType>();
- assert(DeleteFTy->getNumArgs() == 1 || DeleteFTy->getNumArgs() == 2);
+ assert(DeleteFTy->getNumParams() == 1 || DeleteFTy->getNumParams() == 2);
CallArgList Args;
// Pass the pointer as the first argument.
- QualType VoidPtrTy = DeleteFTy->getArgType(0);
+ QualType VoidPtrTy = DeleteFTy->getParamType(0);
llvm::Value *DeletePtr
= CGF.Builder.CreateBitCast(Ptr, CGF.ConvertType(VoidPtrTy));
Args.add(RValue::get(DeletePtr), VoidPtrTy);
// Pass the original requested size as the second argument.
- if (DeleteFTy->getNumArgs() == 2) {
- QualType size_t = DeleteFTy->getArgType(1);
+ if (DeleteFTy->getNumParams() == 2) {
+ QualType size_t = DeleteFTy->getParamType(1);
llvm::IntegerType *SizeTy
= cast<llvm::IntegerType>(CGF.ConvertType(size_t));
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 73d5bcb..1f84c86 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -93,7 +93,7 @@
ComplexPairTy Visit(Expr *E) {
return StmtVisitor<ComplexExprEmitter, ComplexPairTy>::Visit(E);
}
-
+
ComplexPairTy VisitStmt(Stmt *S) {
S->dump(CGF.getContext().getSourceManager());
llvm_unreachable("Stmt can't have complex result type!");
@@ -410,7 +410,7 @@
return ComplexPairTy(Val, llvm::Constant::getNullValue(Val->getType()));
}
-ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
+ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
QualType DestTy) {
switch (CK) {
case CK_Dependent: llvm_unreachable("dependent cast kind in IR gen!");
@@ -427,7 +427,7 @@
case CK_LValueBitCast: {
LValue origLV = CGF.EmitLValue(Op);
llvm::Value *V = origLV.getAddress();
- V = Builder.CreateBitCast(V,
+ V = Builder.CreateBitCast(V,
CGF.ConvertType(CGF.getContext().getPointerType(DestTy)));
return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy,
origLV.getAlignment()),
@@ -475,6 +475,7 @@
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
case CK_ZeroToOCLEvent:
+ case CK_AddressSpaceConversion:
llvm_unreachable("invalid cast kind for complex value");
case CK_FloatingRealToComplex:
@@ -652,7 +653,7 @@
assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
E->getRHS()->getType()));
OpInfo.RHS = Visit(E->getRHS());
-
+
LValue LHS = CGF.EmitLValue(E->getLHS());
// Load from the l-value and convert it.
@@ -702,7 +703,7 @@
LValue ComplexExprEmitter::EmitBinAssignLValue(const BinaryOperator *E,
ComplexPairTy &Val) {
- assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
E->getRHS()->getType()) &&
"Invalid assignment");
TestAndClearIgnoreReal();
@@ -751,11 +752,13 @@
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
ComplexPairTy LHS = Visit(E->getTrueExpr());
LHSBlock = Builder.GetInsertBlock();
CGF.EmitBranch(ContBlock);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index f4d6861..82382dd 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -633,6 +633,9 @@
return llvm::ConstantStruct::get(STy, Elts);
}
+ case CK_AddressSpaceConversion:
+ return llvm::ConstantExpr::getAddrSpaceCast(C, destType);
+
case CK_LValueToRValue:
case CK_AtomicToNonAtomic:
case CK_NonAtomicToAtomic:
@@ -917,7 +920,7 @@
}
case Expr::CallExprClass: {
CallExpr* CE = cast<CallExpr>(E);
- unsigned builtin = CE->isBuiltinCall();
+ unsigned builtin = CE->getBuiltinCallee();
if (builtin !=
Builtin::BI__builtin___CFStringMakeConstantString &&
builtin !=
@@ -1062,13 +1065,13 @@
if (!Offset->isNullValue()) {
llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, Int8PtrTy);
Casted = llvm::ConstantExpr::getGetElementPtr(Casted, Offset);
- C = llvm::ConstantExpr::getBitCast(Casted, C->getType());
+ C = llvm::ConstantExpr::getPointerCast(Casted, C->getType());
}
// Convert to the appropriate type; this could be an lvalue for
// an integer.
if (isa<llvm::PointerType>(DestTy))
- return llvm::ConstantExpr::getBitCast(C, DestTy);
+ return llvm::ConstantExpr::getPointerCast(C, DestTy);
return llvm::ConstantExpr::getPtrToInt(C, DestTy);
} else {
@@ -1265,15 +1268,14 @@
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
// Go through all bases and fill in any null pointer to data members.
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
- if (I->isVirtual()) {
+ for (const auto &I : RD->bases()) {
+ if (I.isVirtual()) {
// Ignore virtual bases.
continue;
}
const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
// Ignore empty bases.
if (BaseDecl->isEmpty())
@@ -1285,7 +1287,7 @@
uint64_t BaseOffset =
CGM.getContext().toBits(Layout.getBaseClassOffset(BaseDecl));
- FillInNullDataMemberPointers(CGM, I->getType(),
+ FillInNullDataMemberPointers(CGM, I.getType(),
Elements, StartOffset + BaseOffset);
}
@@ -1335,16 +1337,15 @@
std::vector<llvm::Constant *> elements(numElements);
// Fill in all the bases.
- for (CXXRecordDecl::base_class_const_iterator
- I = record->bases_begin(), E = record->bases_end(); I != E; ++I) {
- if (I->isVirtual()) {
+ for (const auto &I : record->bases()) {
+ if (I.isVirtual()) {
// Ignore virtual bases; if we're laying out for a complete
// object, we'll lay these out later.
continue;
}
const CXXRecordDecl *base =
- cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// Ignore empty bases.
if (base->isEmpty())
@@ -1356,28 +1357,24 @@
}
// Fill in all the fields.
- for (RecordDecl::field_iterator I = record->field_begin(),
- E = record->field_end(); I != E; ++I) {
- const FieldDecl *field = *I;
-
+ for (const auto *Field : record->fields()) {
// Fill in non-bitfields. (Bitfields always use a zero pattern, which we
// will fill in later.)
- if (!field->isBitField()) {
- unsigned fieldIndex = layout.getLLVMFieldNo(field);
- elements[fieldIndex] = CGM.EmitNullConstant(field->getType());
+ if (!Field->isBitField()) {
+ unsigned fieldIndex = layout.getLLVMFieldNo(Field);
+ elements[fieldIndex] = CGM.EmitNullConstant(Field->getType());
}
// For unions, stop after the first named field.
- if (record->isUnion() && field->getDeclName())
+ if (record->isUnion() && Field->getDeclName())
break;
}
// Fill in the virtual bases, if we're working with the complete object.
if (asCompleteObject) {
- for (CXXRecordDecl::base_class_const_iterator
- I = record->vbases_begin(), E = record->vbases_end(); I != E; ++I) {
+ for (const auto &I : record->vbases()) {
const CXXRecordDecl *base =
- cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// Ignore empty bases.
if (base->isEmpty())
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index f3a5387..5f932b0 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -22,13 +22,13 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CFG.h"
#include <cstdarg>
using namespace clang;
@@ -246,7 +246,7 @@
}
Value *VisitObjCMessageExpr(ObjCMessageExpr *E) {
if (E->getMethodDecl() &&
- E->getMethodDecl()->getResultType()->isReferenceType())
+ E->getMethodDecl()->getReturnType()->isReferenceType())
return EmitLoadOfLValue(E);
return CGF.EmitObjCMessageExpr(E).getScalarVal();
}
@@ -367,11 +367,8 @@
CGF.EmitCXXDeleteExpr(E);
return 0;
}
- Value *VisitUnaryTypeTraitExpr(const UnaryTypeTraitExpr *E) {
- return Builder.getInt1(E->getValue());
- }
- Value *VisitBinaryTypeTraitExpr(const BinaryTypeTraitExpr *E) {
+ Value *VisitTypeTraitExpr(const TypeTraitExpr *E) {
return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue());
}
@@ -1299,7 +1296,18 @@
case CK_AnyPointerToBlockPointerCast:
case CK_BitCast: {
Value *Src = Visit(const_cast<Expr*>(E));
- return Builder.CreateBitCast(Src, ConvertType(DestTy));
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = ConvertType(DestTy);
+ if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
+ llvm::Type *MidTy = CGF.CGM.getDataLayout().getIntPtrType(SrcTy);
+ return Builder.CreateIntToPtr(Builder.CreatePtrToInt(Src, MidTy), DstTy);
+ }
+ return Builder.CreateBitCast(Src, DstTy);
+ }
+ case CK_AddressSpaceConversion: {
+ Value *Src = Visit(const_cast<Expr*>(E));
+ return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy));
}
case CK_AtomicToNonAtomic:
case CK_NonAtomicToAtomic:
@@ -1360,7 +1368,7 @@
// Make sure the array decay ends up being the right type. This matters if
// the array type was of an incomplete type.
- return CGF.Builder.CreateBitCast(V, ConvertType(CE->getType()));
+ return CGF.Builder.CreatePointerCast(V, ConvertType(CE->getType()));
}
case CK_FunctionToPointerDecay:
return EmitLValue(E).getAddress();
@@ -1485,7 +1493,7 @@
}
case CK_ZeroToOCLEvent: {
- assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non event type");
+ assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type");
return llvm::Constant::getNullValue(ConvertType(DestTy));
}
@@ -1727,8 +1735,9 @@
if (atomicPHI) {
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
- llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
- CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
+ llvm::Value *old = Builder.CreateAtomicCmpXchg(
+ LV.getAddress(), atomicPHI, CGF.EmitToMemory(value, type),
+ llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);
@@ -1906,7 +1915,7 @@
QualType eltType;
llvm::Value *numElts;
- llvm::tie(numElts, eltType) = CGF.getVLASize(VAT);
+ std::tie(numElts, eltType) = CGF.getVLASize(VAT);
llvm::Value *size = numElts;
@@ -2069,8 +2078,9 @@
if (atomicPHI) {
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
- llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
- CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
+ llvm::Value *old = Builder.CreateAtomicCmpXchg(
+ LHSLV.getAddress(), atomicPHI, CGF.EmitToMemory(Result, LHSTy),
+ llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);
@@ -2238,7 +2248,7 @@
llvm::BasicBlock *initialBB = Builder.GetInsertBlock();
llvm::Function::iterator insertPt = initialBB;
llvm::BasicBlock *continueBB = CGF.createBasicBlock("nooverflow", CGF.CurFn,
- llvm::next(insertPt));
+ std::next(insertPt));
llvm::BasicBlock *overflowBB = CGF.createBasicBlock("overflow", CGF.CurFn);
Builder.CreateCondBr(overflow, overflowBB, continueBB);
@@ -2529,7 +2539,7 @@
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(elementType)) {
llvm::Value *numElements;
- llvm::tie(numElements, elementType) = CGF.getVLASize(vla);
+ std::tie(numElements, elementType) = CGF.getVLASize(vla);
divisor = numElements;
@@ -2818,11 +2828,11 @@
switch (E->getLHS()->getType().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
- llvm::tie(LHS, RHS) = CGF.EmitARCStoreStrong(E, Ignore);
+ std::tie(LHS, RHS) = CGF.EmitARCStoreStrong(E, Ignore);
break;
case Qualifiers::OCL_Autoreleasing:
- llvm::tie(LHS,RHS) = CGF.EmitARCStoreAutoreleasing(E);
+ std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E);
break;
case Qualifiers::OCL_Weak:
@@ -2866,8 +2876,12 @@
}
Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
// Perform vector logical and on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2889,6 +2903,8 @@
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (LHSCondVal) { // If we have 1 && X, just emit X.
+ Cnt.beginRegion(Builder);
+
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
@@ -2905,7 +2921,7 @@
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is false, go to the failure (cont) block.
- CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
+ CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be false. Start
@@ -2918,6 +2934,7 @@
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
+ Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
eval.end(CGF);
@@ -2937,8 +2954,12 @@
}
Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
// Perform vector logical or on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2960,6 +2981,8 @@
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (!LHSCondVal) { // If we have 0 || X, just emit X.
+ Cnt.beginRegion(Builder);
+
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
@@ -2976,7 +2999,8 @@
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is true, go to the success (cont) block.
- CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
+ Cnt.getParentCount() - Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be true. Start
@@ -2991,6 +3015,7 @@
// Emit the RHS condition as a bool value.
CGF.EmitBlock(RHSBlock);
+ Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
eval.end(CGF);
@@ -3041,6 +3066,7 @@
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
Expr *condExpr = E->getCond();
Expr *lhsExpr = E->getTrueExpr();
@@ -3055,6 +3081,8 @@
// If the dead side doesn't have labels we need, just emit the Live part.
if (!CGF.ContainsLabel(dead)) {
+ if (CondExprBool)
+ Cnt.beginRegion(Builder);
Value *Result = Visit(live);
// If the live part is a throw expression, it acts like it has a void
@@ -3071,6 +3099,8 @@
// the select function.
if (CGF.getLangOpts().OpenCL
&& condExpr->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@@ -3114,6 +3144,8 @@
// safe to evaluate the LHS and RHS unconditionally.
if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
+ Cnt.beginRegion(Builder);
+
llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@@ -3130,9 +3162,10 @@
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
eval.begin(CGF);
Value *LHS = Visit(lhsExpr);
eval.end(CGF);
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 0bda053..f78bb0b 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -22,7 +22,7 @@
#include "clang/Basic/Diagnostic.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
using namespace clang;
@@ -79,10 +79,10 @@
RValue RV = EmitAnyExpr(SubExpr);
CallArgList Args;
Args.add(RV, ArgQT);
-
- RValue result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
- BoxingMethod->getResultType(), Sel, Receiver, Args,
- ClassDecl, BoxingMethod);
+
+ RValue result = Runtime.GenerateMessageSend(
+ *this, ReturnValueSlot(), BoxingMethod->getReturnType(), Sel, Receiver,
+ Args, ClassDecl, BoxingMethod);
return Builder.CreateBitCast(result.getScalarVal(),
ConvertType(E->getType()));
}
@@ -186,12 +186,9 @@
llvm::Value *Receiver = Runtime.GetClass(*this, Class);
// Generate the message send.
- RValue result
- = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
- MethodWithObjects->getResultType(),
- Sel,
- Receiver, Args, Class,
- MethodWithObjects);
+ RValue result = Runtime.GenerateMessageSend(
+ *this, ReturnValueSlot(), MethodWithObjects->getReturnType(), Sel,
+ Receiver, Args, Class, MethodWithObjects);
// The above message send needs these objects, but in ARC they are
// passed in a buffer that is essentially __unsafe_unretained.
@@ -238,7 +235,7 @@
return Result;
if (!Method->hasRelatedResultType() ||
- CGF.getContext().hasSameType(ExpT, Method->getResultType()) ||
+ CGF.getContext().hasSameType(ExpT, Method->getReturnType()) ||
!Result.isScalar())
return Result;
@@ -369,8 +366,7 @@
shouldExtendReceiverForInnerPointerMessage(E))
Receiver = EmitARCRetainAutorelease(ReceiverType, Receiver);
- QualType ResultType =
- method ? method->getResultType() : E->getType();
+ QualType ResultType = method ? method->getReturnType() : E->getType();
CallArgList Args;
EmitCallArgs(Args, method, E->arg_begin(), E->arg_end());
@@ -435,7 +431,7 @@
namespace {
struct FinishARCDealloc : EHScopeStack::Cleanup {
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CGF.CurCodeDecl);
const ObjCImplDecl *impl = cast<ObjCImplDecl>(method->getDeclContext());
@@ -480,13 +476,12 @@
args.push_back(OMD->getSelfDecl());
args.push_back(OMD->getCmdDecl());
- for (ObjCMethodDecl::param_const_iterator PI = OMD->param_begin(),
- E = OMD->param_end(); PI != E; ++PI)
- args.push_back(*PI);
+ for (const auto *PI : OMD->params())
+ args.push_back(PI);
CurGD = OMD;
- StartFunction(OMD, OMD->getResultType(), Fn, FI, args, StartLoc);
+ StartFunction(OMD, OMD->getReturnType(), Fn, FI, args, StartLoc);
// In ARC, certain methods get an extra cleanup.
if (CGM.getLangOpts().ObjCAutoRefCount &&
@@ -506,8 +501,14 @@
/// its pointer, name, and types registered in the class struture.
void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
StartObjCMethod(OMD, OMD->getClassInterface(), OMD->getLocStart());
- EmitStmt(OMD->getBody());
+ PGO.assignRegionCounters(OMD, CurFn);
+ assert(isa<CompoundStmt>(OMD->getBody()));
+ RegionCounter Cnt = getPGORegionCounter(OMD->getBody());
+ Cnt.beginRegion(Builder);
+ EmitCompoundStmtWithoutScope(*cast<CompoundStmt>(OMD->getBody()));
FinishFunction(OMD->getBodyRBrace());
+ PGO.emitInstrumentationData();
+ PGO.destroyRegionCounters();
}
/// emitStructGetterCall - Call the runtime function to load a property
@@ -622,8 +623,8 @@
// Evaluate the ivar's size and alignment.
ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
QualType ivarType = ivar->getType();
- llvm::tie(IvarSize, IvarAlignment)
- = CGM.getContext().getTypeInfoInChars(ivarType);
+ std::tie(IvarSize, IvarAlignment) =
+ CGM.getContext().getTypeInfoInChars(ivarType);
// If we have a copy property, we always have to use getProperty/setProperty.
// TODO: we could actually use setProperty and an expression for non-atomics.
@@ -895,16 +896,21 @@
// FIXME: We shouldn't need to get the function info here, the
// runtime already should have computed it to build the function.
+ llvm::Instruction *CallInstruction;
RValue RV = EmitCall(getTypes().arrangeFreeFunctionCall(propType, args,
FunctionType::ExtInfo(),
RequiredArgs::All),
- getPropertyFn, ReturnValueSlot(), args);
+ getPropertyFn, ReturnValueSlot(), args, 0,
+ &CallInstruction);
+ if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
+ call->setTailCall();
// We need to fix the type here. Ivars with copy & retain are
// always objects so we don't need to worry about complex or
// aggregates.
- RV = RValue::get(Builder.CreateBitCast(RV.getScalarVal(),
- getTypes().ConvertType(getterMethod->getResultType())));
+ RV = RValue::get(Builder.CreateBitCast(
+ RV.getScalarVal(),
+ getTypes().ConvertType(getterMethod->getReturnType())));
EmitReturnOfRValue(RV, propType);
@@ -955,8 +961,8 @@
}
value = Builder.CreateBitCast(value, ConvertType(propType));
- value = Builder.CreateBitCast(value,
- ConvertType(GetterMethodDecl->getResultType()));
+ value = Builder.CreateBitCast(
+ value, ConvertType(GetterMethodDecl->getReturnType()));
}
EmitReturnOfRValue(RValue::get(value), propType);
@@ -1292,7 +1298,7 @@
: addr(addr), ivar(ivar), destroyer(destroyer),
useEHCleanupForArray(useEHCleanupForArray) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
LValue lvalue
= CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), addr, ivar, /*CVR*/ 0);
CGF.emitDestroy(lvalue.getAddress(), ivar->getType(), destroyer,
@@ -1356,12 +1362,9 @@
// Suppress the final autorelease in ARC.
AutoreleaseResult = false;
- SmallVector<CXXCtorInitializer *, 8> IvarInitializers;
- for (ObjCImplementationDecl::init_const_iterator B = IMP->init_begin(),
- E = IMP->init_end(); B != E; ++B) {
- CXXCtorInitializer *IvarInit = (*B);
+ for (const auto *IvarInit : IMP->inits()) {
FieldDecl *Field = IvarInit->getAnyMember();
- ObjCIvarDecl *Ivar = cast<ObjCIvarDecl>(Field);
+ ObjCIvarDecl *Ivar = cast<ObjCIvarDecl>(Field);
LValue LV = EmitLValueForIvar(TypeOfSelfObject(),
LoadObjCSelf(), Ivar, 0);
EmitAggExpr(IvarInit->getInit(),
@@ -1506,9 +1509,13 @@
llvm::Value *zero = llvm::Constant::getNullValue(UnsignedLongLTy);
// If the limit pointer was zero to begin with, the collection is
- // empty; skip all this.
+ // empty; skip all this. Set the branch weight assuming this has the same
+ // probability of exiting the loop as any other loop exit.
+ uint64_t EntryCount = PGO.getCurrentRegionCount();
+ RegionCounter Cnt = getPGORegionCounter(&S);
Builder.CreateCondBr(Builder.CreateICmpEQ(initialBufferLimit, zero, "iszero"),
- EmptyBB, LoopInitBB);
+ EmptyBB, LoopInitBB,
+ PGO.createBranchWeights(EntryCount, Cnt.getCount()));
// Otherwise, initialize the loop.
EmitBlock(LoopInitBB);
@@ -1537,6 +1544,8 @@
llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count");
count->addIncoming(initialBufferLimit, LoopInitBB);
+ Cnt.beginRegion(Builder);
+
// Check whether the mutations value has changed from where it was
// at start. StateMutationsPtr should actually be invariant between
// refreshes.
@@ -1644,8 +1653,12 @@
= Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
// If we haven't overrun the buffer yet, we can continue.
+ // Set the branch weights based on the simplifying assumption that this is
+ // like a while-loop, i.e., ignoring that the false branch fetches more
+ // elements and then returns to the loop.
Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
- LoopBodyBB, FetchMoreBB);
+ LoopBodyBB, FetchMoreBB,
+ PGO.createBranchWeights(Cnt.getCount(), EntryCount));
index->addIncoming(indexPlusOne, AfterBody.getBlock());
count->addIncoming(count, AfterBody.getBlock());
@@ -1715,7 +1728,7 @@
CallObjCRelease(llvm::Value *object) : object(object) {}
llvm::Value *object;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Releases at the end of the full-expression are imprecise.
CGF.EmitARCRelease(object, ARCImpreciseLifetime);
}
@@ -2324,7 +2337,7 @@
CallObjCAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitObjCAutoreleasePoolPop(Token);
}
};
@@ -2333,7 +2346,7 @@
CallObjCMRRAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitObjCMRRAutoreleasePoolPop(Token);
}
};
@@ -2824,9 +2837,8 @@
EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, token);
}
- for (CompoundStmt::const_body_iterator I = S.body_begin(),
- E = S.body_end(); I != E; ++I)
- EmitStmt(*I);
+ for (const auto *I : S.body())
+ EmitStmt(I);
if (DI)
DI->EmitLexicalBlockEnd(Builder, S.getRBracLoc());
@@ -2892,12 +2904,10 @@
args.push_back(&dstDecl);
ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
args.push_back(&srcDecl);
-
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
- FunctionType::ExtInfo(),
- RequiredArgs::All);
-
+
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
@@ -2973,12 +2983,10 @@
args.push_back(&dstDecl);
ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
args.push_back(&srcDecl);
-
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
- FunctionType::ExtInfo(),
- RequiredArgs::All);
-
+
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index a7ab850..2689d7b 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -27,11 +27,11 @@
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Compiler.h"
#include <cstdarg>
@@ -479,102 +479,91 @@
CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
unsigned protocolClassVersion);
- virtual llvm::Constant *GenerateConstantString(const StringLiteral *);
+ llvm::Constant *GenerateConstantString(const StringLiteral *) override;
- virtual RValue
- GenerateMessageSend(CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
- llvm::Value *Receiver,
- const CallArgList &CallArgs,
+ RValue
+ GenerateMessageSend(CodeGenFunction &CGF, ReturnValueSlot Return,
+ QualType ResultType, Selector Sel,
+ llvm::Value *Receiver, const CallArgList &CallArgs,
const ObjCInterfaceDecl *Class,
- const ObjCMethodDecl *Method);
- virtual RValue
- GenerateMessageSendSuper(CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
+ const ObjCMethodDecl *Method) override;
+ RValue
+ GenerateMessageSendSuper(CodeGenFunction &CGF, ReturnValueSlot Return,
+ QualType ResultType, Selector Sel,
const ObjCInterfaceDecl *Class,
- bool isCategoryImpl,
- llvm::Value *Receiver,
- bool IsClassMessage,
- const CallArgList &CallArgs,
- const ObjCMethodDecl *Method);
- virtual llvm::Value *GetClass(CodeGenFunction &CGF,
- const ObjCInterfaceDecl *OID);
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- bool lval = false);
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl
- *Method);
- virtual llvm::Constant *GetEHType(QualType T);
+ bool isCategoryImpl, llvm::Value *Receiver,
+ bool IsClassMessage, const CallArgList &CallArgs,
+ const ObjCMethodDecl *Method) override;
+ llvm::Value *GetClass(CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *OID) override;
+ llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ bool lval = false) override;
+ llvm::Value *GetSelector(CodeGenFunction &CGF,
+ const ObjCMethodDecl *Method) override;
+ llvm::Constant *GetEHType(QualType T) override;
- virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
- const ObjCContainerDecl *CD);
- virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
- virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
- virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD);
- virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
- const ObjCProtocolDecl *PD);
- virtual void GenerateProtocol(const ObjCProtocolDecl *PD);
- virtual llvm::Function *ModuleInitFunction();
- virtual llvm::Constant *GetPropertyGetFunction();
- virtual llvm::Constant *GetPropertySetFunction();
- virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
- bool copy);
- virtual llvm::Constant *GetSetStructFunction();
- virtual llvm::Constant *GetGetStructFunction();
- virtual llvm::Constant *GetCppAtomicObjectGetFunction();
- virtual llvm::Constant *GetCppAtomicObjectSetFunction();
- virtual llvm::Constant *EnumerationMutationFunction();
+ llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
+ const ObjCContainerDecl *CD) override;
+ void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
+ void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
+ void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override;
+ llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+ const ObjCProtocolDecl *PD) override;
+ void GenerateProtocol(const ObjCProtocolDecl *PD) override;
+ llvm::Function *ModuleInitFunction() override;
+ llvm::Constant *GetPropertyGetFunction() override;
+ llvm::Constant *GetPropertySetFunction() override;
+ llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+ bool copy) override;
+ llvm::Constant *GetSetStructFunction() override;
+ llvm::Constant *GetGetStructFunction() override;
+ llvm::Constant *GetCppAtomicObjectGetFunction() override;
+ llvm::Constant *GetCppAtomicObjectSetFunction() override;
+ llvm::Constant *EnumerationMutationFunction() override;
- virtual void EmitTryStmt(CodeGenFunction &CGF,
- const ObjCAtTryStmt &S);
- virtual void EmitSynchronizedStmt(CodeGenFunction &CGF,
- const ObjCAtSynchronizedStmt &S);
- virtual void EmitThrowStmt(CodeGenFunction &CGF,
- const ObjCAtThrowStmt &S,
- bool ClearInsertionPoint=true);
- virtual llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
- llvm::Value *AddrWeakObj);
- virtual void EmitObjCWeakAssign(CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dst);
- virtual void EmitObjCGlobalAssign(CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- bool threadlocal=false);
- virtual void EmitObjCIvarAssign(CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- llvm::Value *ivarOffset);
- virtual void EmitObjCStrongCastAssign(CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest);
- virtual void EmitGCMemmoveCollectable(CodeGenFunction &CGF,
- llvm::Value *DestPtr,
- llvm::Value *SrcPtr,
- llvm::Value *Size);
- virtual LValue EmitObjCValueForIvar(CodeGenFunction &CGF,
- QualType ObjectTy,
- llvm::Value *BaseValue,
- const ObjCIvarDecl *Ivar,
- unsigned CVRQualifiers);
- virtual llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
- const ObjCInterfaceDecl *Interface,
- const ObjCIvarDecl *Ivar);
- virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
- virtual llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM,
- const CGBlockInfo &blockInfo) {
+ void EmitTryStmt(CodeGenFunction &CGF,
+ const ObjCAtTryStmt &S) override;
+ void EmitSynchronizedStmt(CodeGenFunction &CGF,
+ const ObjCAtSynchronizedStmt &S) override;
+ void EmitThrowStmt(CodeGenFunction &CGF,
+ const ObjCAtThrowStmt &S,
+ bool ClearInsertionPoint=true) override;
+ llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
+ llvm::Value *AddrWeakObj) override;
+ void EmitObjCWeakAssign(CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dst) override;
+ void EmitObjCGlobalAssign(CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest,
+ bool threadlocal=false) override;
+ void EmitObjCIvarAssign(CodeGenFunction &CGF, llvm::Value *src,
+ llvm::Value *dest, llvm::Value *ivarOffset) override;
+ void EmitObjCStrongCastAssign(CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest) override;
+ void EmitGCMemmoveCollectable(CodeGenFunction &CGF, llvm::Value *DestPtr,
+ llvm::Value *SrcPtr,
+ llvm::Value *Size) override;
+ LValue EmitObjCValueForIvar(CodeGenFunction &CGF, QualType ObjectTy,
+ llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+ unsigned CVRQualifiers) override;
+ llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar) override;
+ llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
+ llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override {
return NULLPtr;
}
- virtual llvm::Constant *BuildRCBlockLayout(CodeGenModule &CGM,
- const CGBlockInfo &blockInfo) {
+ llvm::Constant *BuildRCBlockLayout(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override {
return NULLPtr;
}
-
- virtual llvm::Constant *BuildByrefLayout(CodeGenModule &CGM,
- QualType T) {
+
+ llvm::Constant *BuildByrefLayout(CodeGenModule &CGM, QualType T) override {
return NULLPtr;
}
-
- virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) {
+
+ llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+ bool Weak = false) override {
return 0;
}
};
@@ -595,11 +584,9 @@
/// arguments. Returns the IMP for the corresponding method.
LazyRuntimeFunction MsgLookupSuperFn;
protected:
- virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
- llvm::Value *&Receiver,
- llvm::Value *cmd,
- llvm::MDNode *node,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+ llvm::Value *cmd, llvm::MDNode *node,
+ MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *args[] = {
EnforceType(Builder, Receiver, IdTy),
@@ -608,10 +595,8 @@
imp->setMetadata(msgSendMDKind, node);
return imp.getInstruction();
}
- virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
- llvm::Value *ObjCSuper,
- llvm::Value *cmd,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+ llvm::Value *cmd, MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
PtrToObjCSuperTy), cmd};
@@ -654,13 +639,11 @@
/// lookup functions.
llvm::Type *SlotTy;
public:
- virtual llvm::Constant *GetEHType(QualType T);
+ llvm::Constant *GetEHType(QualType T) override;
protected:
- virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
- llvm::Value *&Receiver,
- llvm::Value *cmd,
- llvm::MDNode *node,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+ llvm::Value *cmd, llvm::MDNode *node,
+ MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Function *LookupFn = SlotLookupFn;
@@ -696,10 +679,9 @@
Receiver = Builder.CreateLoad(ReceiverPtr, true);
return imp;
}
- virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
- llvm::Value *ObjCSuper,
- llvm::Value *cmd,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+ llvm::Value *cmd,
+ MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *lookupArgs[] = {ObjCSuper, cmd};
@@ -760,22 +742,22 @@
CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
PtrTy, PtrTy, NULL);
}
- virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
+ llvm::Constant *GetCppAtomicObjectGetFunction() override {
// The optimised functions were added in version 1.7 of the GNUstep
// runtime.
assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
VersionTuple(1, 7));
return CxxAtomicObjectGetFn;
}
- virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+ llvm::Constant *GetCppAtomicObjectSetFunction() override {
// The optimised functions were added in version 1.7 of the GNUstep
// runtime.
assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
VersionTuple(1, 7));
return CxxAtomicObjectSetFn;
}
- virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
- bool copy) {
+ llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+ bool copy) override {
// The optimised property functions omit the GC check, and so are not
// safe to use in GC mode. The standard functions are fast in GC mode,
// so there is less advantage in using them.
@@ -789,10 +771,8 @@
if (copy) return SetPropertyAtomicCopy;
return SetPropertyAtomic;
}
- if (copy) return SetPropertyNonAtomicCopy;
- return SetPropertyNonAtomic;
- return 0;
+ return copy ? SetPropertyNonAtomicCopy : SetPropertyNonAtomic;
}
};
@@ -810,11 +790,9 @@
/// arguments. Returns the IMP for the corresponding method.
LazyRuntimeFunction MsgLookupSuperFn, MsgLookupSuperFnSRet;
- virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
- llvm::Value *&Receiver,
- llvm::Value *cmd,
- llvm::MDNode *node,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+ llvm::Value *cmd, llvm::MDNode *node,
+ MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *args[] = {
EnforceType(Builder, Receiver, IdTy),
@@ -830,10 +808,8 @@
return imp.getInstruction();
}
- virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
- llvm::Value *ObjCSuper,
- llvm::Value *cmd,
- MessageSendInfo &MSI) {
+ llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+ llvm::Value *cmd, MessageSendInfo &MSI) override {
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
PtrToObjCSuperTy), cmd};
@@ -844,8 +820,8 @@
return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
}
- virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
- const std::string &Name, bool isWeak) {
+ llvm::Value *GetClassNamed(CodeGenFunction &CGF,
+ const std::string &Name, bool isWeak) override {
if (isWeak)
return CGObjCGNU::GetClassNamed(CGF, Name, isWeak);
@@ -948,7 +924,7 @@
Int64Ty = llvm::Type::getInt64Ty(VMContext);
IntPtrTy =
- TheModule.getPointerSize() == llvm::Module::Pointer32 ? Int32Ty : Int64Ty;
+ CGM.getDataLayout().getPointerSizeInBits() == 32 ? Int32Ty : Int64Ty;
// Object type
QualType UnqualIdTy = CGM.getContext().getObjCIdType();
@@ -1779,24 +1755,22 @@
PD = Def;
SmallVector<std::string, 16> Protocols;
- for (ObjCProtocolDecl::protocol_iterator PI = PD->protocol_begin(),
- E = PD->protocol_end(); PI != E; ++PI)
- Protocols.push_back((*PI)->getNameAsString());
+ for (const auto *PI : PD->protocols())
+ Protocols.push_back(PI->getNameAsString());
SmallVector<llvm::Constant*, 16> InstanceMethodNames;
SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
SmallVector<llvm::Constant*, 16> OptionalInstanceMethodNames;
SmallVector<llvm::Constant*, 16> OptionalInstanceMethodTypes;
- for (ObjCProtocolDecl::instmeth_iterator iter = PD->instmeth_begin(),
- E = PD->instmeth_end(); iter != E; iter++) {
+ for (const auto *I : PD->instance_methods()) {
std::string TypeStr;
- Context.getObjCEncodingForMethodDecl(*iter, TypeStr);
- if ((*iter)->getImplementationControl() == ObjCMethodDecl::Optional) {
+ Context.getObjCEncodingForMethodDecl(I, TypeStr);
+ if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
OptionalInstanceMethodNames.push_back(
- MakeConstantString((*iter)->getSelector().getAsString()));
+ MakeConstantString(I->getSelector().getAsString()));
OptionalInstanceMethodTypes.push_back(MakeConstantString(TypeStr));
} else {
InstanceMethodNames.push_back(
- MakeConstantString((*iter)->getSelector().getAsString()));
+ MakeConstantString(I->getSelector().getAsString()));
InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
}
}
@@ -1805,18 +1779,16 @@
SmallVector<llvm::Constant*, 16> ClassMethodTypes;
SmallVector<llvm::Constant*, 16> OptionalClassMethodNames;
SmallVector<llvm::Constant*, 16> OptionalClassMethodTypes;
- for (ObjCProtocolDecl::classmeth_iterator
- iter = PD->classmeth_begin(), endIter = PD->classmeth_end();
- iter != endIter ; iter++) {
+ for (const auto *I : PD->class_methods()) {
std::string TypeStr;
- Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
- if ((*iter)->getImplementationControl() == ObjCMethodDecl::Optional) {
+ Context.getObjCEncodingForMethodDecl(I,TypeStr);
+ if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
OptionalClassMethodNames.push_back(
- MakeConstantString((*iter)->getSelector().getAsString()));
+ MakeConstantString(I->getSelector().getAsString()));
OptionalClassMethodTypes.push_back(MakeConstantString(TypeStr));
} else {
ClassMethodNames.push_back(
- MakeConstantString((*iter)->getSelector().getAsString()));
+ MakeConstantString(I->getSelector().getAsString()));
ClassMethodTypes.push_back(MakeConstantString(TypeStr));
}
}
@@ -1846,11 +1818,8 @@
// Add all of the property methods need adding to the method list and to the
// property metadata list.
- for (ObjCContainerDecl::prop_iterator
- iter = PD->prop_begin(), endIter = PD->prop_end();
- iter != endIter ; iter++) {
+ for (auto *property : PD->properties()) {
std::vector<llvm::Constant*> Fields;
- ObjCPropertyDecl *property = *iter;
Fields.push_back(MakePropertyEncodingString(property, 0));
PushPropertyAttributes(Fields, property);
@@ -1996,8 +1965,7 @@
/// bitfield / with the 63rd bit set will be 1<<64.
llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
int bitCount = bits.size();
- int ptrBits =
- (TheModule.getPointerSize() == llvm::Module::Pointer32) ? 32 : 64;
+ int ptrBits = CGM.getDataLayout().getPointerSizeInBits();
if (bitCount < ptrBits) {
uint64_t val = 1;
for (int i=0 ; i<bitCount ; ++i) {
@@ -2032,24 +2000,20 @@
// Collect information about instance methods
SmallVector<Selector, 16> InstanceMethodSels;
SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
- for (ObjCCategoryImplDecl::instmeth_iterator
- iter = OCD->instmeth_begin(), endIter = OCD->instmeth_end();
- iter != endIter ; iter++) {
- InstanceMethodSels.push_back((*iter)->getSelector());
+ for (const auto *I : OCD->instance_methods()) {
+ InstanceMethodSels.push_back(I->getSelector());
std::string TypeStr;
- CGM.getContext().getObjCEncodingForMethodDecl(*iter,TypeStr);
+ CGM.getContext().getObjCEncodingForMethodDecl(I,TypeStr);
InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
}
// Collect information about class methods
SmallVector<Selector, 16> ClassMethodSels;
SmallVector<llvm::Constant*, 16> ClassMethodTypes;
- for (ObjCCategoryImplDecl::classmeth_iterator
- iter = OCD->classmeth_begin(), endIter = OCD->classmeth_end();
- iter != endIter ; iter++) {
- ClassMethodSels.push_back((*iter)->getSelector());
+ for (const auto *I : OCD->class_methods()) {
+ ClassMethodSels.push_back(I->getSelector());
std::string TypeStr;
- CGM.getContext().getObjCEncodingForMethodDecl(*iter,TypeStr);
+ CGM.getContext().getObjCEncodingForMethodDecl(I,TypeStr);
ClassMethodTypes.push_back(MakeConstantString(TypeStr));
}
@@ -2093,12 +2057,9 @@
// Add all of the property methods need adding to the method list and to the
// property metadata list.
- for (ObjCImplDecl::propimpl_iterator
- iter = OID->propimpl_begin(), endIter = OID->propimpl_end();
- iter != endIter ; iter++) {
+ for (auto *propertyImpl : OID->property_impls()) {
std::vector<llvm::Constant*> Fields;
- ObjCPropertyDecl *property = iter->getPropertyDecl();
- ObjCPropertyImplDecl *propertyImpl = *iter;
+ ObjCPropertyDecl *property = propertyImpl->getPropertyDecl();
bool isSynthesized = (propertyImpl->getPropertyImplementation() ==
ObjCPropertyImplDecl::Synthesize);
bool isDynamic = (propertyImpl->getPropertyImplementation() ==
@@ -2265,12 +2226,10 @@
// Collect information about instance methods
SmallVector<Selector, 16> InstanceMethodSels;
SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
- for (ObjCImplementationDecl::instmeth_iterator
- iter = OID->instmeth_begin(), endIter = OID->instmeth_end();
- iter != endIter ; iter++) {
- InstanceMethodSels.push_back((*iter)->getSelector());
+ for (const auto *I : OID->instance_methods()) {
+ InstanceMethodSels.push_back(I->getSelector());
std::string TypeStr;
- Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
+ Context.getObjCEncodingForMethodDecl(I,TypeStr);
InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
}
@@ -2281,22 +2240,16 @@
// Collect information about class methods
SmallVector<Selector, 16> ClassMethodSels;
SmallVector<llvm::Constant*, 16> ClassMethodTypes;
- for (ObjCImplementationDecl::classmeth_iterator
- iter = OID->classmeth_begin(), endIter = OID->classmeth_end();
- iter != endIter ; iter++) {
- ClassMethodSels.push_back((*iter)->getSelector());
+ for (const auto *I : OID->class_methods()) {
+ ClassMethodSels.push_back(I->getSelector());
std::string TypeStr;
- Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
+ Context.getObjCEncodingForMethodDecl(I,TypeStr);
ClassMethodTypes.push_back(MakeConstantString(TypeStr));
}
// Collect the names of referenced protocols
SmallVector<std::string, 16> Protocols;
- for (ObjCInterfaceDecl::protocol_iterator
- I = ClassDecl->protocol_begin(),
- E = ClassDecl->protocol_end(); I != E; ++I)
- Protocols.push_back((*I)->getNameAsString());
-
-
+ for (const auto *I : ClassDecl->protocols())
+ Protocols.push_back(I->getNameAsString());
// Get the superclass pointer.
llvm::Constant *SuperClass;
@@ -2595,7 +2548,7 @@
llvm::Constant::getNullValue(RegisterAlias->getType()));
Builder.CreateCondBr(HasRegisterAlias, AliasBB, NoAliasBB);
- // The true branch (has alias registration fucntion):
+ // The true branch (has alias registration function):
Builder.SetInsertPoint(AliasBB);
// Emit alias registration calls:
for (std::vector<ClassAliasPair>::iterator iter = ClassAliases.begin();
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2b2a5b8..8f5969c 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -29,12 +29,12 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
@@ -174,6 +174,7 @@
public:
llvm::Type *ShortTy, *IntTy, *LongTy, *LongLongTy;
llvm::Type *Int8PtrTy, *Int8PtrPtrTy;
+ llvm::Type *IvarOffsetVarTy;
/// ObjectPtrTy - LLVM type for object handles (typeof(id))
llvm::Type *ObjectPtrTy;
@@ -243,8 +244,8 @@
Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
Params.push_back(Ctx.BoolTy);
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, Params,
- FunctionType::ExtInfo(),
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, false, Params,
+ FunctionType::ExtInfo(),
RequiredArgs::All));
return CGM.CreateRuntimeFunction(FTy, "objc_getProperty");
}
@@ -263,8 +264,9 @@
Params.push_back(Ctx.BoolTy);
Params.push_back(Ctx.BoolTy);
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
- FunctionType::ExtInfo(),
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+ Params,
+ FunctionType::ExtInfo(),
RequiredArgs::All));
return CGM.CreateRuntimeFunction(FTy, "objc_setProperty");
}
@@ -289,7 +291,8 @@
Params.push_back(IdType);
Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+ Params,
FunctionType::ExtInfo(),
RequiredArgs::All));
const char *name;
@@ -316,8 +319,9 @@
Params.push_back(Ctx.BoolTy);
Params.push_back(Ctx.BoolTy);
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
- FunctionType::ExtInfo(),
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+ Params,
+ FunctionType::ExtInfo(),
RequiredArgs::All));
return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct");
}
@@ -335,8 +339,9 @@
Params.push_back(Ctx.VoidPtrTy);
Params.push_back(Ctx.VoidPtrTy);
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
- FunctionType::ExtInfo(),
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+ Params,
+ FunctionType::ExtInfo(),
RequiredArgs::All));
return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic");
}
@@ -348,8 +353,9 @@
SmallVector<CanQualType,1> Params;
Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType()));
llvm::FunctionType *FTy =
- Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
- FunctionType::ExtInfo(),
+ Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+ Params,
+ FunctionType::ExtInfo(),
RequiredArgs::All));
return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation");
}
@@ -885,6 +891,9 @@
/// DefinedClasses - List of defined classes.
SmallVector<llvm::GlobalValue*, 16> DefinedClasses;
+
+ /// ImplementedClasses - List of @implemented classes.
+ SmallVector<const ObjCInterfaceDecl*, 16> ImplementedClasses;
/// DefinedNonLazyClasses - List of defined "non-lazy" classes.
SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyClasses;
@@ -991,7 +1000,7 @@
llvm::SmallPtrSet<const IdentifierInfo*, 16> &PropertySet,
SmallVectorImpl<llvm::Constant*> &Properties,
const Decl *Container,
- const ObjCProtocolDecl *PROTO,
+ const ObjCProtocolDecl *Proto,
const ObjCCommonTypesHelper &ObjCTypes);
/// GetProtocolRef - Return a reference to the internal protocol
@@ -1038,12 +1047,12 @@
CGObjCCommonMac(CodeGen::CodeGenModule &cgm) :
CGObjCRuntime(cgm), VMContext(cgm.getLLVMContext()) { }
- virtual llvm::Constant *GenerateConstantString(const StringLiteral *SL);
-
- virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
- const ObjCContainerDecl *CD=0);
+ llvm::Constant *GenerateConstantString(const StringLiteral *SL) override;
- virtual void GenerateProtocol(const ObjCProtocolDecl *PD);
+ llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
+ const ObjCContainerDecl *CD=0) override;
+
+ void GenerateProtocol(const ObjCProtocolDecl *PD) override;
/// GetOrEmitProtocol - Get the protocol object for the given
/// declaration, emitting it if necessary. The return value has type
@@ -1055,13 +1064,13 @@
/// forward references will be filled in with empty bodies if no
/// definition is seen. The return value has type ProtocolPtrTy.
virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD)=0;
- virtual llvm::Constant *BuildGCBlockLayout(CodeGen::CodeGenModule &CGM,
- const CGBlockInfo &blockInfo);
- virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
- const CGBlockInfo &blockInfo);
-
- virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
- QualType T);
+ llvm::Constant *BuildGCBlockLayout(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override;
+ llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override;
+
+ llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
+ QualType T) override;
};
class CGObjCMac : public CGObjCCommonMac {
@@ -1092,9 +1101,9 @@
llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
IdentifierInfo *II);
-
- llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
-
+
+ llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
+
/// EmitSuperClassRef - Emits reference to class's main metadata class.
llvm::Value *EmitSuperClassRef(const ObjCInterfaceDecl *ID);
@@ -1145,13 +1154,13 @@
/// GetOrEmitProtocol - Get the protocol object for the given
/// declaration, emitting it if necessary. The return value has type
/// ProtocolPtrTy.
- virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD);
+ llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override;
/// GetOrEmitProtocolRef - Get a forward reference to the protocol
/// object for the given declaration, emitting it if needed. These
/// forward references will be filled in with empty bodies if no
/// definition is seen. The return value has type ProtocolPtrTy.
- virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD);
+ llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) override;
/// EmitProtocolExtension - Generate the protocol extension
/// structure used to store optional instance and class methods, and
@@ -1177,97 +1186,90 @@
public:
CGObjCMac(CodeGen::CodeGenModule &cgm);
- virtual llvm::Function *ModuleInitFunction();
+ llvm::Function *ModuleInitFunction() override;
- virtual CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
- llvm::Value *Receiver,
- const CallArgList &CallArgs,
- const ObjCInterfaceDecl *Class,
- const ObjCMethodDecl *Method);
+ CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
+ ReturnValueSlot Return,
+ QualType ResultType,
+ Selector Sel, llvm::Value *Receiver,
+ const CallArgList &CallArgs,
+ const ObjCInterfaceDecl *Class,
+ const ObjCMethodDecl *Method) override;
- virtual CodeGen::RValue
+ CodeGen::RValue
GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
- const ObjCInterfaceDecl *Class,
- bool isCategoryImpl,
- llvm::Value *Receiver,
- bool IsClassMessage,
- const CallArgList &CallArgs,
- const ObjCMethodDecl *Method);
+ ReturnValueSlot Return, QualType ResultType,
+ Selector Sel, const ObjCInterfaceDecl *Class,
+ bool isCategoryImpl, llvm::Value *Receiver,
+ bool IsClassMessage, const CallArgList &CallArgs,
+ const ObjCMethodDecl *Method) override;
- virtual llvm::Value *GetClass(CodeGenFunction &CGF,
- const ObjCInterfaceDecl *ID);
+ llvm::Value *GetClass(CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *ID) override;
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- bool lval = false);
+ llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ bool lval = false) override;
/// The NeXT/Apple runtimes do not support typed selectors; just emit an
/// untyped one.
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
- const ObjCMethodDecl *Method);
+ llvm::Value *GetSelector(CodeGenFunction &CGF,
+ const ObjCMethodDecl *Method) override;
- virtual llvm::Constant *GetEHType(QualType T);
+ llvm::Constant *GetEHType(QualType T) override;
- virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
+ void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
- virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
+ void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
- virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
+ void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override {}
- virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
- const ObjCProtocolDecl *PD);
+ llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+ const ObjCProtocolDecl *PD) override;
- virtual llvm::Constant *GetPropertyGetFunction();
- virtual llvm::Constant *GetPropertySetFunction();
- virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
- bool copy);
- virtual llvm::Constant *GetGetStructFunction();
- virtual llvm::Constant *GetSetStructFunction();
- virtual llvm::Constant *GetCppAtomicObjectGetFunction();
- virtual llvm::Constant *GetCppAtomicObjectSetFunction();
- virtual llvm::Constant *EnumerationMutationFunction();
+ llvm::Constant *GetPropertyGetFunction() override;
+ llvm::Constant *GetPropertySetFunction() override;
+ llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+ bool copy) override;
+ llvm::Constant *GetGetStructFunction() override;
+ llvm::Constant *GetSetStructFunction() override;
+ llvm::Constant *GetCppAtomicObjectGetFunction() override;
+ llvm::Constant *GetCppAtomicObjectSetFunction() override;
+ llvm::Constant *EnumerationMutationFunction() override;
- virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtTryStmt &S);
- virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtSynchronizedStmt &S);
+ void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
+ const ObjCAtTryStmt &S) override;
+ void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
+ const ObjCAtSynchronizedStmt &S) override;
void EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const Stmt &S);
- virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtThrowStmt &S,
- bool ClearInsertionPoint=true);
- virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
- llvm::Value *AddrWeakObj);
- virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dst);
- virtual void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- bool threadlocal = false);
- virtual void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- llvm::Value *ivarOffset);
- virtual void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest);
- virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *dest, llvm::Value *src,
- llvm::Value *size);
+ void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
+ bool ClearInsertionPoint=true) override;
+ llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *AddrWeakObj) override;
+ void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dst) override;
+ void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest,
+ bool threadlocal = false) override;
+ void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest,
+ llvm::Value *ivarOffset) override;
+ void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest) override;
+ void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *dest, llvm::Value *src,
+ llvm::Value *size) override;
- virtual LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
- QualType ObjectTy,
- llvm::Value *BaseValue,
- const ObjCIvarDecl *Ivar,
- unsigned CVRQualifiers);
- virtual llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
- const ObjCInterfaceDecl *Interface,
- const ObjCIvarDecl *Ivar);
-
+ LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
+ llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+ unsigned CVRQualifiers) override;
+ llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar) override;
+
/// GetClassGlobal - Return the global variable for the Objective-C
/// class of the given name.
- virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) {
+ llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+ bool Weak = false) override {
llvm_unreachable("CGObjCMac::GetClassGlobal");
}
};
@@ -1316,7 +1318,8 @@
llvm::Constant *IsAGV,
llvm::Constant *SuperClassGV,
llvm::Constant *ClassRoGV,
- bool HiddenVisibility);
+ bool HiddenVisibility,
+ bool Weak);
llvm::Constant *GetMethodConstant(const ObjCMethodDecl *MD);
@@ -1341,13 +1344,13 @@
/// GetOrEmitProtocol - Get the protocol object for the given
/// declaration, emitting it if necessary. The return value has type
/// ProtocolPtrTy.
- virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD);
+ llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override;
/// GetOrEmitProtocolRef - Get a forward reference to the protocol
/// object for the given declaration, emitting it if needed. These
/// forward references will be filled in with empty bodies if no
/// definition is seen. The return value has type ProtocolPtrTy.
- virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD);
+ llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) override;
/// EmitProtocolList - Generate the list of referenced
/// protocols. The return value has type ProtocolListPtrTy.
@@ -1367,17 +1370,18 @@
/// GetClassGlobal - Return the global variable for the Objective-C
/// class of the given name.
- llvm::GlobalVariable *GetClassGlobal(const std::string &Name);
-
+ llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+ bool Weak = false) override;
+
/// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
/// for the given class reference.
llvm::Value *EmitClassRef(CodeGenFunction &CGF,
const ObjCInterfaceDecl *ID);
llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
- IdentifierInfo *II);
-
- llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
+ IdentifierInfo *II, bool Weak);
+
+ llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
/// EmitSuperClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
/// for the given super class reference.
@@ -1434,11 +1438,10 @@
bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const;
bool IsIvarOffsetKnownIdempotent(const CodeGen::CodeGenFunction &CGF,
- const ObjCInterfaceDecl *ID,
const ObjCIvarDecl *IV) {
- // Annotate the load as an invariant load iff the object type is the type,
- // or a derived type, of the class containing the ivar within an ObjC
- // method. This check is needed because the ivar offset is a lazily
+ // Annotate the load as an invariant load iff inside an instance method
+ // and ivar belongs to instance method's class and one of its super class.
+ // This check is needed because the ivar offset is a lazily
// initialised value that may depend on objc_msgSend to perform a fixup on
// the first message dispatch.
//
@@ -1446,121 +1449,115 @@
// base of the ivar access is a parameter to an Objective C method.
// However, because the parameters are not available in the current
// interface, we cannot perform this check.
- if (CGF.CurFuncDecl && isa<ObjCMethodDecl>(CGF.CurFuncDecl))
- if (IV->getContainingInterface()->isSuperClassOf(ID))
- return true;
+ if (const ObjCMethodDecl *MD =
+ dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl))
+ if (MD->isInstanceMethod())
+ if (const ObjCInterfaceDecl *ID = MD->getClassInterface())
+ return IV->getContainingInterface()->isSuperClassOf(ID);
return false;
}
public:
CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm);
// FIXME. All stubs for now!
- virtual llvm::Function *ModuleInitFunction();
+ llvm::Function *ModuleInitFunction() override;
- virtual CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
- llvm::Value *Receiver,
- const CallArgList &CallArgs,
- const ObjCInterfaceDecl *Class,
- const ObjCMethodDecl *Method);
+ CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
+ ReturnValueSlot Return,
+ QualType ResultType, Selector Sel,
+ llvm::Value *Receiver,
+ const CallArgList &CallArgs,
+ const ObjCInterfaceDecl *Class,
+ const ObjCMethodDecl *Method) override;
- virtual CodeGen::RValue
+ CodeGen::RValue
GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
- ReturnValueSlot Return,
- QualType ResultType,
- Selector Sel,
- const ObjCInterfaceDecl *Class,
- bool isCategoryImpl,
- llvm::Value *Receiver,
- bool IsClassMessage,
- const CallArgList &CallArgs,
- const ObjCMethodDecl *Method);
+ ReturnValueSlot Return, QualType ResultType,
+ Selector Sel, const ObjCInterfaceDecl *Class,
+ bool isCategoryImpl, llvm::Value *Receiver,
+ bool IsClassMessage, const CallArgList &CallArgs,
+ const ObjCMethodDecl *Method) override;
- virtual llvm::Value *GetClass(CodeGenFunction &CGF,
- const ObjCInterfaceDecl *ID);
+ llvm::Value *GetClass(CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *ID) override;
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- bool lvalue = false)
+ llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ bool lvalue = false) override
{ return EmitSelector(CGF, Sel, lvalue); }
/// The NeXT/Apple runtimes do not support typed selectors; just emit an
/// untyped one.
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
- const ObjCMethodDecl *Method)
+ llvm::Value *GetSelector(CodeGenFunction &CGF,
+ const ObjCMethodDecl *Method) override
{ return EmitSelector(CGF, Method->getSelector()); }
- virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
+ void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
- virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
+ void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
- virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
+ void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override {}
- virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
- const ObjCProtocolDecl *PD);
+ llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+ const ObjCProtocolDecl *PD) override;
- virtual llvm::Constant *GetEHType(QualType T);
+ llvm::Constant *GetEHType(QualType T) override;
- virtual llvm::Constant *GetPropertyGetFunction() {
+ llvm::Constant *GetPropertyGetFunction() override {
return ObjCTypes.getGetPropertyFn();
}
- virtual llvm::Constant *GetPropertySetFunction() {
+ llvm::Constant *GetPropertySetFunction() override {
return ObjCTypes.getSetPropertyFn();
}
-
- virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
- bool copy) {
+
+ llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+ bool copy) override {
return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy);
}
-
- virtual llvm::Constant *GetSetStructFunction() {
+
+ llvm::Constant *GetSetStructFunction() override {
return ObjCTypes.getCopyStructFn();
}
- virtual llvm::Constant *GetGetStructFunction() {
+ llvm::Constant *GetGetStructFunction() override {
return ObjCTypes.getCopyStructFn();
}
- virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+ llvm::Constant *GetCppAtomicObjectSetFunction() override {
return ObjCTypes.getCppAtomicObjectFunction();
}
- virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
+ llvm::Constant *GetCppAtomicObjectGetFunction() override {
return ObjCTypes.getCppAtomicObjectFunction();
}
-
- virtual llvm::Constant *EnumerationMutationFunction() {
+
+ llvm::Constant *EnumerationMutationFunction() override {
return ObjCTypes.getEnumerationMutationFn();
}
- virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtTryStmt &S);
- virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtSynchronizedStmt &S);
- virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
- const ObjCAtThrowStmt &S,
- bool ClearInsertionPoint=true);
- virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
- llvm::Value *AddrWeakObj);
- virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dst);
- virtual void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- bool threadlocal = false);
- virtual void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest,
- llvm::Value *ivarOffset);
- virtual void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
- llvm::Value *src, llvm::Value *dest);
- virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *dest, llvm::Value *src,
- llvm::Value *size);
- virtual LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
- QualType ObjectTy,
- llvm::Value *BaseValue,
- const ObjCIvarDecl *Ivar,
- unsigned CVRQualifiers);
- virtual llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
- const ObjCInterfaceDecl *Interface,
- const ObjCIvarDecl *Ivar);
+ void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
+ const ObjCAtTryStmt &S) override;
+ void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
+ const ObjCAtSynchronizedStmt &S) override;
+ void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
+ bool ClearInsertionPoint=true) override;
+ llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *AddrWeakObj) override;
+ void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dst) override;
+ void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest,
+ bool threadlocal = false) override;
+ void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest,
+ llvm::Value *ivarOffset) override;
+ void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *src, llvm::Value *dest) override;
+ void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *dest, llvm::Value *src,
+ llvm::Value *size) override;
+ LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
+ llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+ unsigned CVRQualifiers) override;
+ llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar) override;
};
/// A helper class for performing the null-initialization of a return
@@ -1878,14 +1875,14 @@
MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
if (Method)
- assert(CGM.getContext().getCanonicalType(Method->getResultType()) ==
- CGM.getContext().getCanonicalType(ResultType) &&
+ assert(CGM.getContext().getCanonicalType(Method->getReturnType()) ==
+ CGM.getContext().getCanonicalType(ResultType) &&
"Result type mismatch!");
NullReturnState nullReturn;
llvm::Constant *Fn = NULL;
- if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) {
+ if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
if (!IsSuper) nullReturn.init(CGF, Arg0);
Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper)
: ObjCTypes.getSendStretFn(IsSuper);
@@ -1896,15 +1893,17 @@
Fn = (ObjCABI == 2) ? ObjCTypes.getSendFp2RetFn2(IsSuper)
: ObjCTypes.getSendFp2retFn(IsSuper);
} else {
+ // arm64 uses objc_msgSend for stret methods and yet null receiver check
+ // must be made for it.
+ if (!IsSuper && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
+ nullReturn.init(CGF, Arg0);
Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper)
: ObjCTypes.getSendFn(IsSuper);
}
bool requiresnullCheck = false;
if (CGM.getLangOpts().ObjCAutoRefCount && Method)
- for (ObjCMethodDecl::param_const_iterator i = Method->param_begin(),
- e = Method->param_end(); i != e; ++i) {
- const ParmVarDecl *ParamDecl = (*i);
+ for (const auto *ParamDecl : Method->params()) {
if (ParamDecl->hasAttr<NSConsumedAttr>()) {
if (!nullReturn.NullBB)
nullReturn.init(CGF, Arg0);
@@ -1967,9 +1966,8 @@
// to be GC'ed.
// Walk the captured variables.
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
QualType type = variable->getType();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
@@ -1980,7 +1978,7 @@
uint64_t fieldOffset = layout->getElementOffset(capture.getIndex());
// __block variables are passed by their descriptor address.
- if (ci->isByRef()) {
+ if (CI.isByRef()) {
IvarsInfo.push_back(GC_IVAR(fieldOffset, /*size in words*/ 1));
continue;
}
@@ -2118,9 +2116,6 @@
ElCount *= CArray->getSize().getZExtValue();
FQT = CArray->getElementType();
}
-
- assert(!FQT->isUnionType() &&
- "layout for array of unions not supported");
if (FQT->isRecordType() && ElCount) {
int OldIndex = RunSkipBlockVars.size() - 1;
const RecordType *RT = FQT->getAs<RecordType>();
@@ -2196,10 +2191,7 @@
bool &HasUnion,
bool ByrefLayout) {
const RecordDecl *RD = RT->getDecl();
- SmallVector<const FieldDecl*, 16> Fields;
- for (RecordDecl::field_iterator i = RD->field_begin(),
- e = RD->field_end(); i != e; ++i)
- Fields.push_back(*i);
+ SmallVector<const FieldDecl*, 16> Fields(RD->fields());
llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
const llvm::StructLayout *RecLayout =
CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
@@ -2486,9 +2478,8 @@
blockInfo.BlockHeaderForcedGapOffset,
blockInfo.BlockHeaderForcedGapSize);
// Walk the captured variables.
- for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
- ce = blockDecl->capture_end(); ci != ce; ++ci) {
- const VarDecl *variable = ci->getVariable();
+ for (const auto &CI : blockDecl->captures()) {
+ const VarDecl *variable = CI.getVariable();
QualType type = variable->getType();
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
@@ -2500,17 +2491,17 @@
CharUnits::fromQuantity(layout->getElementOffset(capture.getIndex()));
assert(!type->isArrayType() && "array variable should not be caught");
- if (!ci->isByRef())
+ if (!CI.isByRef())
if (const RecordType *record = type->getAs<RecordType>()) {
BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion);
continue;
}
CharUnits fieldSize;
- if (ci->isByRef())
+ if (CI.isByRef())
fieldSize = CharUnits::fromQuantity(WordSizeInBytes);
else
fieldSize = CGM.getContext().getTypeSizeInChars(type);
- UpdateRunSkipBlockVars(ci->isByRef(), getBlockCaptureLifetime(type, false),
+ UpdateRunSkipBlockVars(CI.isByRef(), getBlockCaptureLifetime(type, false),
fieldOffset, fieldSize);
}
return getBitmapBlockLayout(false);
@@ -2562,8 +2553,16 @@
return GetOrEmitProtocolRef(PD);
}
+static void assertPrivateName(const llvm::GlobalValue *GV) {
+ StringRef NameRef = GV->getName();
+ (void)NameRef;
+ assert(NameRef[0] == '\01' && (NameRef[1] == 'L' || NameRef[1] == 'l'));
+ assert(GV->getVisibility() == llvm::GlobalValue::DefaultVisibility);
+ assert(GV->getLinkage() == llvm::GlobalValue::PrivateLinkage);
+}
+
/*
-// APPLE LOCAL radar 4585769 - Objective-C 1.0 extensions
+// Objective-C 1.0 extensions
struct _objc_protocol {
struct _objc_protocol_extension *isa;
char *protocol_name;
@@ -2593,9 +2592,7 @@
std::vector<llvm::Constant*> InstanceMethods, ClassMethods;
std::vector<llvm::Constant*> OptInstanceMethods, OptClassMethods;
std::vector<llvm::Constant*> MethodTypesExt, OptMethodTypesExt;
- for (ObjCProtocolDecl::instmeth_iterator
- i = PD->instmeth_begin(), e = PD->instmeth_end(); i != e; ++i) {
- ObjCMethodDecl *MD = *i;
+ for (const auto *MD : PD->instance_methods()) {
llvm::Constant *C = GetMethodDescriptionConstant(MD);
if (!C)
return GetOrEmitProtocolRef(PD);
@@ -2609,9 +2606,7 @@
}
}
- for (ObjCProtocolDecl::classmeth_iterator
- i = PD->classmeth_begin(), e = PD->classmeth_end(); i != e; ++i) {
- ObjCMethodDecl *MD = *i;
+ for (const auto *MD : PD->class_methods()) {
llvm::Constant *C = GetMethodDescriptionConstant(MD);
if (!C)
return GetOrEmitProtocolRef(PD);
@@ -2646,13 +2641,13 @@
Values);
if (Entry) {
- // Already created, fix the linkage and update the initializer.
- Entry->setLinkage(llvm::GlobalValue::InternalLinkage);
+ // Already created, update the initializer.
+ assert(Entry->getLinkage() == llvm::GlobalValue::PrivateLinkage);
Entry->setInitializer(Init);
} else {
Entry =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolTy, false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init,
"\01L_OBJC_PROTOCOL_" + PD->getName());
Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
@@ -2661,7 +2656,8 @@
Protocols[PD->getIdentifier()] = Entry;
}
- CGM.AddUsedGlobal(Entry);
+ assertPrivateName(Entry);
+ CGM.addCompilerUsedGlobal(Entry);
return Entry;
}
@@ -2675,13 +2671,14 @@
// contents for protocols which were referenced but never defined.
Entry =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolTy, false,
- llvm::GlobalValue::ExternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
0,
"\01L_OBJC_PROTOCOL_" + PD->getName());
Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
// FIXME: Is this necessary? Why only for protocol?
Entry->setAlignment(4);
}
+ assertPrivateName(Entry);
return Entry;
}
@@ -2726,7 +2723,7 @@
llvm::ConstantStruct::get(ObjCTypes.ProtocolExtensionTy, Values);
// No special section, but goes in llvm.used
- return CreateMetadataVar("\01L_OBJC_PROTOCOLEXT_" + PD->getName(),
+ return CreateMetadataVar("\01l_OBJC_PROTOCOLEXT_" + PD->getName(),
Init,
0, 0, true);
}
@@ -2775,14 +2772,11 @@
PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
SmallVectorImpl<llvm::Constant *> &Properties,
const Decl *Container,
- const ObjCProtocolDecl *PROTO,
+ const ObjCProtocolDecl *Proto,
const ObjCCommonTypesHelper &ObjCTypes) {
- for (ObjCProtocolDecl::protocol_iterator P = PROTO->protocol_begin(),
- E = PROTO->protocol_end(); P != E; ++P)
- PushProtocolProperties(PropertySet, Properties, Container, (*P), ObjCTypes);
- for (ObjCContainerDecl::prop_iterator I = PROTO->prop_begin(),
- E = PROTO->prop_end(); I != E; ++I) {
- const ObjCPropertyDecl *PD = *I;
+ for (const auto *P : Proto->protocols())
+ PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+ for (const auto *PD : Proto->properties()) {
if (!PropertySet.insert(PD->getIdentifier()))
continue;
llvm::Constant *Prop[] = {
@@ -2811,9 +2805,7 @@
const ObjCCommonTypesHelper &ObjCTypes) {
SmallVector<llvm::Constant *, 16> Properties;
llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
- for (ObjCContainerDecl::prop_iterator I = OCD->prop_begin(),
- E = OCD->prop_end(); I != E; ++I) {
- const ObjCPropertyDecl *PD = *I;
+ for (const auto *PD : OCD->properties()) {
PropertySet.insert(PD->getIdentifier());
llvm::Constant *Prop[] = {
GetPropertyName(PD->getIdentifier()),
@@ -2823,17 +2815,12 @@
Prop));
}
if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) {
- for (ObjCInterfaceDecl::all_protocol_iterator
- P = OID->all_referenced_protocol_begin(),
- E = OID->all_referenced_protocol_end(); P != E; ++P)
- PushProtocolProperties(PropertySet, Properties, Container, (*P),
- ObjCTypes);
+ for (const auto *P : OID->all_referenced_protocols())
+ PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
}
else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) {
- for (ObjCCategoryDecl::protocol_iterator P = CD->protocol_begin(),
- E = CD->protocol_end(); P != E; ++P)
- PushProtocolProperties(PropertySet, Properties, Container, (*P),
- ObjCTypes);
+ for (const auto *P : CD->protocols())
+ PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
}
// Return null for empty list.
@@ -2945,16 +2932,13 @@
<< OCD->getName();
SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
- for (ObjCCategoryImplDecl::instmeth_iterator
- i = OCD->instmeth_begin(), e = OCD->instmeth_end(); i != e; ++i) {
+ for (const auto *I : OCD->instance_methods())
// Instance methods should always be defined.
- InstanceMethods.push_back(GetMethodConstant(*i));
- }
- for (ObjCCategoryImplDecl::classmeth_iterator
- i = OCD->classmeth_begin(), e = OCD->classmeth_end(); i != e; ++i) {
+ InstanceMethods.push_back(GetMethodConstant(I));
+
+ for (const auto *I : OCD->class_methods())
// Class methods should always be defined.
- ClassMethods.push_back(GetMethodConstant(*i));
- }
+ ClassMethods.push_back(GetMethodConstant(I));
llvm::Constant *Values[7];
Values[0] = GetClassName(OCD->getIdentifier());
@@ -3073,21 +3057,15 @@
Flags |= FragileABI_Class_Hidden;
SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
- for (ObjCImplementationDecl::instmeth_iterator
- i = ID->instmeth_begin(), e = ID->instmeth_end(); i != e; ++i) {
+ for (const auto *I : ID->instance_methods())
// Instance methods should always be defined.
- InstanceMethods.push_back(GetMethodConstant(*i));
- }
- for (ObjCImplementationDecl::classmeth_iterator
- i = ID->classmeth_begin(), e = ID->classmeth_end(); i != e; ++i) {
+ InstanceMethods.push_back(GetMethodConstant(I));
+
+ for (const auto *I : ID->class_methods())
// Class methods should always be defined.
- ClassMethods.push_back(GetMethodConstant(*i));
- }
+ ClassMethods.push_back(GetMethodConstant(I));
- for (ObjCImplementationDecl::propimpl_iterator
- i = ID->propimpl_begin(), e = ID->propimpl_end(); i != e; ++i) {
- ObjCPropertyImplDecl *PID = *i;
-
+ for (const auto *PID : ID->property_impls()) {
if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) {
ObjCPropertyDecl *PD = PID->getPropertyDecl();
@@ -3133,19 +3111,19 @@
Name += ClassName;
const char *Section = "__OBJC,__class,regular,no_dead_strip";
// Check for a forward reference.
- llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
+ llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
if (GV) {
assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
"Forward metaclass reference has incorrect type.");
- GV->setLinkage(llvm::GlobalValue::InternalLinkage);
GV->setInitializer(Init);
GV->setSection(Section);
GV->setAlignment(4);
- CGM.AddUsedGlobal(GV);
- }
- else
+ CGM.addCompilerUsedGlobal(GV);
+ } else
GV = CreateMetadataVar(Name, Init, Section, 4, true);
+ assertPrivateName(GV);
DefinedClasses.push_back(GV);
+ ImplementedClasses.push_back(Interface);
// method definition entries must be clear for next implementation.
MethodDefinitions.clear();
}
@@ -3201,20 +3179,20 @@
Name += ID->getName();
// Check for a forward reference.
- llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
+ llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
if (GV) {
assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
"Forward metaclass reference has incorrect type.");
- GV->setLinkage(llvm::GlobalValue::InternalLinkage);
GV->setInitializer(Init);
} else {
GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init, Name);
}
+ assertPrivateName(GV);
GV->setSection("__OBJC,__meta_class,regular,no_dead_strip");
GV->setAlignment(4);
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
return GV;
}
@@ -3230,35 +3208,29 @@
// Check for an existing forward reference.
// Previously, metaclass with internal linkage may have been defined.
// pass 'true' as 2nd argument so it is returned.
- if (llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name,
- true)) {
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
- "Forward metaclass reference has incorrect type.");
- return GV;
- } else {
- // Generate as an external reference to keep a consistent
- // module. This will be patched up when we emit the metaclass.
- return new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
- llvm::GlobalValue::ExternalLinkage,
- 0,
- Name);
- }
+ llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
+ if (!GV)
+ GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
+ llvm::GlobalValue::PrivateLinkage, 0, Name);
+
+ assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ "Forward metaclass reference has incorrect type.");
+ assertPrivateName(GV);
+ return GV;
}
llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
std::string Name = "\01L_OBJC_CLASS_" + ID->getNameAsString();
-
- if (llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name,
- true)) {
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
- "Forward class metadata reference has incorrect type.");
- return GV;
- } else {
- return new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
- llvm::GlobalValue::ExternalLinkage,
- 0,
- Name);
- }
+ llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
+
+ if (!GV)
+ GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
+ llvm::GlobalValue::PrivateLinkage, 0, Name);
+
+ assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ "Forward class metadata reference has incorrect type.");
+ assertPrivateName(GV);
+ return GV;
}
/*
@@ -3430,13 +3402,14 @@
llvm::Type *Ty = Init->getType();
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(CGM.getModule(), Ty, false,
- llvm::GlobalValue::InternalLinkage, Init, Name);
+ llvm::GlobalValue::PrivateLinkage, Init, Name);
+ assertPrivateName(GV);
if (Section)
GV->setSection(Section);
if (Align)
GV->setAlignment(Align);
if (AddToUsed)
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
return GV;
}
@@ -3501,7 +3474,7 @@
: S(*S), SyncArgSlot(SyncArgSlot), CallTryExitVar(CallTryExitVar),
ExceptionData(ExceptionData), ObjCTypes(*ObjCTypes) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
// Check whether we need to call objc_exception_try_exit.
// In optimized code, this branch will always be folded.
llvm::BasicBlock *FinallyCallExit =
@@ -4293,14 +4266,14 @@
/// unsigned flags;
/// };
enum ImageInfoFlags {
- eImageInfo_FixAndContinue = (1 << 0),
+ eImageInfo_FixAndContinue = (1 << 0), // This flag is no longer set by clang.
eImageInfo_GarbageCollected = (1 << 1),
eImageInfo_GCOnly = (1 << 2),
- eImageInfo_OptimizedByDyld = (1 << 3), // FIXME: When is this set.
+ eImageInfo_OptimizedByDyld = (1 << 3), // This flag is set by the dyld shared cache.
// A flag indicating that the module has no instances of a @synthesize of a
// superclass variable. <rdar://problem/6803242>
- eImageInfo_CorrectedSynthesize = (1 << 4),
+ eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang.
eImageInfo_ImageIsSimulated = (1 << 5)
};
@@ -4399,9 +4372,17 @@
// The runtime expects exactly the list of defined classes followed
// by the list of defined categories, in a single array.
SmallVector<llvm::Constant*, 8> Symbols(NumClasses + NumCategories);
- for (unsigned i=0; i<NumClasses; i++)
+ for (unsigned i=0; i<NumClasses; i++) {
+ const ObjCInterfaceDecl *ID = ImplementedClasses[i];
+ assert(ID);
+ if (ObjCImplementationDecl *IMP = ID->getImplementation())
+ // We are implementing a weak imported interface. Give it external linkage
+ if (ID->isWeakImported() && !IMP->isWeakImported())
+ DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
+
Symbols[i] = llvm::ConstantExpr::getBitCast(DefinedClasses[i],
ObjCTypes.Int8PtrTy);
+ }
for (unsigned i=0; i<NumCategories; i++)
Symbols[NumClasses + i] =
llvm::ConstantExpr::getBitCast(DefinedCategories[i],
@@ -4507,10 +4488,7 @@
bool &HasUnion) {
const RecordDecl *RD = RT->getDecl();
// FIXME - Use iterator.
- SmallVector<const FieldDecl*, 16> Fields;
- for (RecordDecl::field_iterator i = RD->field_begin(),
- e = RD->field_end(); i != e; ++i)
- Fields.push_back(*i);
+ SmallVector<const FieldDecl*, 16> Fields(RD->fields());
llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
const llvm::StructLayout *RecLayout =
CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
@@ -4589,9 +4567,6 @@
ElCount *= CArray->getSize().getZExtValue();
FQT = CArray->getElementType();
}
-
- assert(!FQT->isUnionType() &&
- "layout for array of unions not supported");
if (FQT->isRecordType() && ElCount) {
int OldIndex = IvarsInfo.size() - 1;
int OldSkIndex = SkipIvars.size() -1;
@@ -4996,10 +4971,10 @@
Values[2] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListPtrTy);
Values[3] = Values[4] =
llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy);
- I->second->setLinkage(llvm::GlobalValue::InternalLinkage);
+ assertPrivateName(I->second);
I->second->setInitializer(llvm::ConstantStruct::get(ObjCTypes.ProtocolTy,
Values));
- CGM.AddUsedGlobal(I->second);
+ CGM.addCompilerUsedGlobal(I->second);
}
// Add assembler directives to add lazy undefined symbol references
@@ -5054,6 +5029,13 @@
Int8PtrTy = CGM.Int8PtrTy;
Int8PtrPtrTy = CGM.Int8PtrPtrTy;
+ // arm64 targets use "int" ivar offset variables. All others,
+ // including OS X x86_64 and Windows x86_64, use "long" ivar offsets.
+ if (CGM.getTarget().getTriple().getArch() == llvm::Triple::arm64)
+ IvarOffsetVarTy = IntTy;
+ else
+ IvarOffsetVarTy = LongTy;
+
ObjectPtrTy = Types.ConvertType(Ctx.getObjCIdType());
PtrObjectPtrTy = llvm::PointerType::getUnqual(ObjectPtrTy);
SelectorPtrTy = Types.ConvertType(Ctx.getObjCSelType());
@@ -5357,16 +5339,15 @@
ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy);
// struct _ivar_t {
- // unsigned long int *offset; // pointer to ivar offset location
+ // unsigned [long] int *offset; // pointer to ivar offset location
// char *name;
// char *type;
// uint32_t alignment;
// uint32_t size;
// }
- IvarnfABITy =
- llvm::StructType::create("struct._ivar_t",
- llvm::PointerType::getUnqual(LongTy),
- Int8PtrTy, Int8PtrTy, IntTy, IntTy, NULL);
+ IvarnfABITy = llvm::StructType::create(
+ "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy),
+ Int8PtrTy, Int8PtrTy, IntTy, IntTy, NULL);
// struct _ivar_list_t {
// uint32 entsize; // sizeof(struct _ivar_t)
@@ -5521,12 +5502,13 @@
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init,
SymbolName);
+ assertPrivateName(GV);
GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType()));
GV->setSection(SectionName);
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
}
void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
@@ -5534,24 +5516,20 @@
// Build list of all implemented class addresses in array
// L_OBJC_LABEL_CLASS_$.
+
+ for (unsigned i=0, NumClasses=ImplementedClasses.size(); i<NumClasses; i++) {
+ const ObjCInterfaceDecl *ID = ImplementedClasses[i];
+ assert(ID);
+ if (ObjCImplementationDecl *IMP = ID->getImplementation())
+ // We are implementing a weak imported interface. Give it external linkage
+ if (ID->isWeakImported() && !IMP->isWeakImported())
+ DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
+ }
+
AddModuleClassList(DefinedClasses,
"\01L_OBJC_LABEL_CLASS_$",
"__DATA, __objc_classlist, regular, no_dead_strip");
-
- for (unsigned i = 0, e = DefinedClasses.size(); i < e; i++) {
- llvm::GlobalValue *IMPLGV = DefinedClasses[i];
- if (IMPLGV->getLinkage() != llvm::GlobalValue::ExternalWeakLinkage)
- continue;
- IMPLGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- }
-
- for (unsigned i = 0, e = DefinedMetaClasses.size(); i < e; i++) {
- llvm::GlobalValue *IMPLGV = DefinedMetaClasses[i];
- if (IMPLGV->getLinkage() != llvm::GlobalValue::ExternalWeakLinkage)
- continue;
- IMPLGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- }
-
+
AddModuleClassList(DefinedNonLazyClasses,
"\01L_OBJC_LABEL_NONLAZY_CLASS_$",
"__DATA, __objc_nlclslist, regular, no_dead_strip");
@@ -5669,22 +5647,16 @@
std::string MethodListName("\01l_OBJC_$_");
if (flags & NonFragileABI_Class_Meta) {
MethodListName += "CLASS_METHODS_" + ID->getNameAsString();
- for (ObjCImplementationDecl::classmeth_iterator
- i = ID->classmeth_begin(), e = ID->classmeth_end(); i != e; ++i) {
+ for (const auto *I : ID->class_methods())
// Class methods should always be defined.
- Methods.push_back(GetMethodConstant(*i));
- }
+ Methods.push_back(GetMethodConstant(I));
} else {
MethodListName += "INSTANCE_METHODS_" + ID->getNameAsString();
- for (ObjCImplementationDecl::instmeth_iterator
- i = ID->instmeth_begin(), e = ID->instmeth_end(); i != e; ++i) {
+ for (const auto *I : ID->instance_methods())
// Instance methods should always be defined.
- Methods.push_back(GetMethodConstant(*i));
- }
- for (ObjCImplementationDecl::propimpl_iterator
- i = ID->propimpl_begin(), e = ID->propimpl_end(); i != e; ++i) {
- ObjCPropertyImplDecl *PID = *i;
+ Methods.push_back(GetMethodConstant(I));
+ for (const auto *PID : ID->property_impls()) {
if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize){
ObjCPropertyDecl *PD = PID->getPropertyDecl();
@@ -5721,11 +5693,12 @@
Values);
llvm::GlobalVariable *CLASS_RO_GV =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassRonfABITy, false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init,
(flags & NonFragileABI_Class_Meta) ?
std::string("\01l_OBJC_METACLASS_RO_$_")+ClassName :
std::string("\01l_OBJC_CLASS_RO_$_")+ClassName);
+ assertPrivateName(CLASS_RO_GV);
CLASS_RO_GV->setAlignment(
CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassRonfABITy));
CLASS_RO_GV->setSection("__DATA, __objc_const");
@@ -5743,12 +5716,9 @@
/// struct class_ro_t *ro;
/// }
///
-llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassMetaData(
- std::string &ClassName,
- llvm::Constant *IsAGV,
- llvm::Constant *SuperClassGV,
- llvm::Constant *ClassRoGV,
- bool HiddenVisibility) {
+llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData(
+ std::string &ClassName, llvm::Constant *IsAGV, llvm::Constant *SuperClassGV,
+ llvm::Constant *ClassRoGV, bool HiddenVisibility, bool Weak) {
llvm::Constant *Values[] = {
IsAGV,
SuperClassGV,
@@ -5763,7 +5733,7 @@
llvm::PointerType::getUnqual(ObjCTypes.ImpnfABITy));
llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassnfABITy,
Values);
- llvm::GlobalVariable *GV = GetClassGlobal(ClassName);
+ llvm::GlobalVariable *GV = GetClassGlobal(ClassName, Weak);
GV->setInitializer(Init);
GV->setSection("__DATA, __objc_data");
GV->setAlignment(
@@ -5849,31 +5819,37 @@
if (!ID->getClassInterface()->getSuperClass()) {
// class is root
flags |= NonFragileABI_Class_Root;
- SuperClassGV = GetClassGlobal(ObjCClassName + ClassName);
- IsAGV = GetClassGlobal(ObjCMetaClassName + ClassName);
+ SuperClassGV = GetClassGlobal(ObjCClassName + ClassName,
+ ID->getClassInterface()->isWeakImported());
+ IsAGV = GetClassGlobal(ObjCMetaClassName + ClassName,
+ ID->getClassInterface()->isWeakImported());
+
+ // We are implementing a weak imported interface. Give it external
+ // linkage.
+ if (!ID->isWeakImported() && ID->getClassInterface()->isWeakImported())
+ IsAGV->setLinkage(llvm::GlobalVariable::ExternalLinkage);
} else {
// Has a root. Current class is not a root.
const ObjCInterfaceDecl *Root = ID->getClassInterface();
while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
Root = Super;
- IsAGV = GetClassGlobal(ObjCMetaClassName + Root->getNameAsString());
- if (Root->isWeakImported())
- IsAGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+ IsAGV = GetClassGlobal(ObjCMetaClassName + Root->getNameAsString(),
+ Root->isWeakImported());
// work on super class metadata symbol.
std::string SuperClassName =
ObjCMetaClassName +
ID->getClassInterface()->getSuperClass()->getNameAsString();
- SuperClassGV = GetClassGlobal(SuperClassName);
- if (ID->getClassInterface()->getSuperClass()->isWeakImported())
- SuperClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+ SuperClassGV = GetClassGlobal(
+ SuperClassName,
+ ID->getClassInterface()->getSuperClass()->isWeakImported());
}
llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags,
InstanceStart,
InstanceSize,ID);
std::string TClassName = ObjCMetaClassName + ClassName;
- llvm::GlobalVariable *MetaTClass =
- BuildClassMetaData(TClassName, IsAGV, SuperClassGV, CLASS_RO_GV,
- classIsHidden);
+ llvm::GlobalVariable *MetaTClass = BuildClassMetaData(
+ TClassName, IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden,
+ ID->isWeakImported());
DefinedMetaClasses.push_back(MetaTClass);
// Metadata for the class
@@ -5904,9 +5880,9 @@
// Has a root. Current class is not a root.
std::string RootClassName =
ID->getClassInterface()->getSuperClass()->getNameAsString();
- SuperClassGV = GetClassGlobal(ObjCClassName + RootClassName);
- if (ID->getClassInterface()->getSuperClass()->isWeakImported())
- SuperClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+ SuperClassGV = GetClassGlobal(
+ ObjCClassName + RootClassName,
+ ID->getClassInterface()->getSuperClass()->isWeakImported());
}
GetClassSizeInfo(ID, InstanceStart, InstanceSize);
CLASS_RO_GV = BuildClassRoTInitializer(flags,
@@ -5917,8 +5893,10 @@
TClassName = ObjCClassName + ClassName;
llvm::GlobalVariable *ClassMD =
BuildClassMetaData(TClassName, MetaTClass, SuperClassGV, CLASS_RO_GV,
- classIsHidden);
+ classIsHidden,
+ ID->getClassInterface()->isWeakImported());
DefinedClasses.push_back(ClassMD);
+ ImplementedClasses.push_back(ID->getClassInterface());
// Determine if this class is also "non-lazy".
if (ImplementationIsNonLazy(ID))
@@ -5963,7 +5941,7 @@
ProtocolName);
PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.AddUsedGlobal(PTGV);
+ CGM.addCompilerUsedGlobal(PTGV);
return CGF.Builder.CreateLoad(PTGV);
}
@@ -5988,21 +5966,18 @@
llvm::Constant *Values[6];
Values[0] = GetClassName(OCD->getIdentifier());
// meta-class entry symbol
- llvm::GlobalVariable *ClassGV = GetClassGlobal(ExtClassName);
- if (Interface->isWeakImported())
- ClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
-
+ llvm::GlobalVariable *ClassGV =
+ GetClassGlobal(ExtClassName, Interface->isWeakImported());
+
Values[1] = ClassGV;
std::vector<llvm::Constant*> Methods;
std::string MethodListName(Prefix);
MethodListName += "INSTANCE_METHODS_" + Interface->getNameAsString() +
"_$_" + OCD->getNameAsString();
- for (ObjCCategoryImplDecl::instmeth_iterator
- i = OCD->instmeth_begin(), e = OCD->instmeth_end(); i != e; ++i) {
+ for (const auto *I : OCD->instance_methods())
// Instance methods should always be defined.
- Methods.push_back(GetMethodConstant(*i));
- }
+ Methods.push_back(GetMethodConstant(I));
Values[2] = EmitMethodList(MethodListName,
"__DATA, __objc_const",
@@ -6012,11 +5987,9 @@
MethodListName += "CLASS_METHODS_" + Interface->getNameAsString() + "_$_" +
OCD->getNameAsString();
Methods.clear();
- for (ObjCCategoryImplDecl::classmeth_iterator
- i = OCD->classmeth_begin(), e = OCD->classmeth_end(); i != e; ++i) {
+ for (const auto *I : OCD->class_methods())
// Class methods should always be defined.
- Methods.push_back(GetMethodConstant(*i));
- }
+ Methods.push_back(GetMethodConstant(I));
Values[3] = EmitMethodList(MethodListName,
"__DATA, __objc_const",
@@ -6045,13 +6018,14 @@
llvm::GlobalVariable *GCATV
= new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CategorynfABITy,
false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init,
ExtCatName);
+ assertPrivateName(GCATV);
GCATV->setAlignment(
CGM.getDataLayout().getABITypeAlignment(ObjCTypes.CategorynfABITy));
GCATV->setSection("__DATA, __objc_const");
- CGM.AddUsedGlobal(GCATV);
+ CGM.addCompilerUsedGlobal(GCATV);
DefinedCategories.push_back(GCATV);
// Determine if this category is also "non-lazy".
@@ -6107,10 +6081,11 @@
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
- llvm::GlobalValue::InternalLinkage, Init, Name);
+ llvm::GlobalValue::PrivateLinkage, Init, Name);
+ assertPrivateName(GV);
GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType()));
GV->setSection(Section);
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListnfABIPtrTy);
}
@@ -6125,12 +6100,9 @@
llvm::GlobalVariable *IvarOffsetGV =
CGM.getModule().getGlobalVariable(Name);
if (!IvarOffsetGV)
- IvarOffsetGV =
- new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.LongTy,
- false,
- llvm::GlobalValue::ExternalLinkage,
- 0,
- Name);
+ IvarOffsetGV = new llvm::GlobalVariable(
+ CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false,
+ llvm::GlobalValue::ExternalLinkage, 0, Name);
return IvarOffsetGV;
}
@@ -6139,10 +6111,10 @@
const ObjCIvarDecl *Ivar,
unsigned long int Offset) {
llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar);
- IvarOffsetGV->setInitializer(llvm::ConstantInt::get(ObjCTypes.LongTy,
- Offset));
+ IvarOffsetGV->setInitializer(
+ llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset));
IvarOffsetGV->setAlignment(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.LongTy));
+ CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy));
// FIXME: This matches gcc, but shouldn't the visibility be set on the use as
// well (i.e., in ObjCIvarOffsetVariable).
@@ -6160,7 +6132,7 @@
/// implementation. The return value has type
/// IvarListnfABIPtrTy.
/// struct _ivar_t {
-/// unsigned long int *offset; // pointer to ivar offset location
+/// unsigned [long] int *offset; // pointer to ivar offset location
/// char *name;
/// char *type;
/// uint32_t alignment;
@@ -6223,14 +6195,15 @@
const char *Prefix = "\01l_OBJC_$_INSTANCE_VARIABLES_";
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init,
Prefix + OID->getName());
+ assertPrivateName(GV);
GV->setAlignment(
CGM.getDataLayout().getABITypeAlignment(Init->getType()));
GV->setSection("__DATA, __objc_const");
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListnfABIPtrTy);
}
@@ -6243,10 +6216,9 @@
// reference or not. At module finalization we add the empty
// contents for protocols which were referenced but never defined.
Entry =
- new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy, false,
- llvm::GlobalValue::ExternalLinkage,
- 0,
- "\01l_OBJC_PROTOCOL_$_" + PD->getName());
+ new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy,
+ false, llvm::GlobalValue::WeakAnyLinkage, 0,
+ "\01l_OBJC_PROTOCOL_$_" + PD->getName());
Entry->setSection("__DATA,__datacoal_nt,coalesced");
}
@@ -6287,9 +6259,7 @@
std::vector<llvm::Constant*> InstanceMethods, ClassMethods;
std::vector<llvm::Constant*> OptInstanceMethods, OptClassMethods;
std::vector<llvm::Constant*> MethodTypesExt, OptMethodTypesExt;
- for (ObjCProtocolDecl::instmeth_iterator
- i = PD->instmeth_begin(), e = PD->instmeth_end(); i != e; ++i) {
- ObjCMethodDecl *MD = *i;
+ for (const auto *MD : PD->instance_methods()) {
llvm::Constant *C = GetMethodDescriptionConstant(MD);
if (!C)
return GetOrEmitProtocolRef(PD);
@@ -6303,9 +6273,7 @@
}
}
- for (ObjCProtocolDecl::classmeth_iterator
- i = PD->classmeth_begin(), e = PD->classmeth_end(); i != e; ++i) {
- ObjCMethodDecl *MD = *i;
+ for (const auto *MD : PD->class_methods()) {
llvm::Constant *C = GetMethodDescriptionConstant(MD);
if (!C)
return GetOrEmitProtocolRef(PD);
@@ -6359,8 +6327,8 @@
Values);
if (Entry) {
- // Already created, fix the linkage and update the initializer.
- Entry->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+ // Already created, update the initializer.
+ assert(Entry->getLinkage() == llvm::GlobalValue::WeakAnyLinkage);
Entry->setInitializer(Init);
} else {
Entry =
@@ -6374,7 +6342,7 @@
Protocols[PD->getIdentifier()] = Entry;
}
Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.AddUsedGlobal(Entry);
+ CGM.addCompilerUsedGlobal(Entry);
// Use this protocol meta-data to build protocol list table in section
// __DATA, __objc_protolist
@@ -6386,7 +6354,7 @@
CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy));
PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip");
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.AddUsedGlobal(PTGV);
+ CGM.addCompilerUsedGlobal(PTGV);
return Entry;
}
@@ -6433,12 +6401,13 @@
llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Init, Name);
+ assertPrivateName(GV);
GV->setSection("__DATA, __objc_const");
GV->setAlignment(
CGM.getDataLayout().getABITypeAlignment(Init->getType()));
- CGM.AddUsedGlobal(GV);
+ CGM.addCompilerUsedGlobal(GV);
return llvm::ConstantExpr::getBitCast(GV,
ObjCTypes.ProtocolListnfABIPtrTy);
}
@@ -6479,12 +6448,6 @@
unsigned CVRQualifiers) {
ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface();
llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar);
-
- if (IsIvarOffsetKnownIdempotent(CGF, ID, Ivar))
- if (llvm::LoadInst *LI = cast<llvm::LoadInst>(Offset))
- LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
- llvm::MDNode::get(VMContext, ArrayRef<llvm::Value*>()));
-
return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
Offset);
}
@@ -6493,7 +6456,20 @@
CodeGen::CodeGenFunction &CGF,
const ObjCInterfaceDecl *Interface,
const ObjCIvarDecl *Ivar) {
- return CGF.Builder.CreateLoad(ObjCIvarOffsetVariable(Interface, Ivar),"ivar");
+ llvm::Value *IvarOffsetValue = ObjCIvarOffsetVariable(Interface, Ivar);
+ IvarOffsetValue = CGF.Builder.CreateLoad(IvarOffsetValue, "ivar");
+ if (IsIvarOffsetKnownIdempotent(CGF, Ivar))
+ cast<llvm::LoadInst>(IvarOffsetValue)
+ ->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
+ llvm::MDNode::get(VMContext, ArrayRef<llvm::Value *>()));
+
+ // This could be 32bit int or 64bit integer depending on the architecture.
+ // Cast it to 64bit integer value, if it is a 32bit integer ivar offset value
+ // as this is what caller always expectes.
+ if (ObjCTypes.IvarOffsetVarTy == ObjCTypes.IntTy)
+ IvarOffsetValue = CGF.Builder.CreateIntCast(
+ IvarOffsetValue, ObjCTypes.LongTy, true, "ivar.conv");
+ return IvarOffsetValue;
}
static void appendSelectorForMessageRefTable(std::string &buffer,
@@ -6556,7 +6532,7 @@
// FIXME: don't use this for that.
llvm::Constant *fn = 0;
std::string messageRefName("\01l_");
- if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) {
+ if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
if (isSuper) {
fn = ObjCTypes.getMessageSendSuper2StretFixupFn();
messageRefName += "objc_msgSendSuper2_stret_fixup";
@@ -6603,9 +6579,7 @@
bool requiresnullCheck = false;
if (CGM.getLangOpts().ObjCAutoRefCount && method)
- for (ObjCMethodDecl::param_const_iterator i = method->param_begin(),
- e = method->param_end(); i != e; ++i) {
- const ParmVarDecl *ParamDecl = (*i);
+ for (const auto *ParamDecl : method->params()) {
if (ParamDecl->hasAttr<NSConsumedAttr>()) {
if (!nullReturn.NullBB)
nullReturn.init(CGF, arg0);
@@ -6652,49 +6626,53 @@
}
llvm::GlobalVariable *
-CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name) {
+CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) {
+ llvm::GlobalValue::LinkageTypes L =
+ Weak ? llvm::GlobalValue::ExternalWeakLinkage
+ : llvm::GlobalValue::ExternalLinkage;
+
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
- if (!GV) {
+ if (!GV)
GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
- false, llvm::GlobalValue::ExternalLinkage,
- 0, Name);
- }
+ false, L, 0, Name);
+ assert(GV->getLinkage() == L);
return GV;
}
llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
- IdentifierInfo *II) {
+ IdentifierInfo *II,
+ bool Weak) {
llvm::GlobalVariable *&Entry = ClassReferences[II];
if (!Entry) {
std::string ClassName(getClassSymbolPrefix() + II->getName().str());
- llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
+ llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak);
Entry =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
- false, llvm::GlobalValue::InternalLinkage,
+ false, llvm::GlobalValue::PrivateLinkage,
ClassGV,
"\01L_OBJC_CLASSLIST_REFERENCES_$_");
Entry->setAlignment(
CGM.getDataLayout().getABITypeAlignment(
ObjCTypes.ClassnfABIPtrTy));
Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip");
- CGM.AddUsedGlobal(Entry);
+ CGM.addCompilerUsedGlobal(Entry);
}
-
+ assertPrivateName(Entry);
return CGF.Builder.CreateLoad(Entry);
}
llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
const ObjCInterfaceDecl *ID) {
- return EmitClassRefFromId(CGF, ID->getIdentifier());
+ return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported());
}
llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef(
CodeGenFunction &CGF) {
IdentifierInfo *II = &CGM.getContext().Idents.get("NSAutoreleasePool");
- return EmitClassRefFromId(CGF, II);
+ return EmitClassRefFromId(CGF, II, false);
}
llvm::Value *
@@ -6704,19 +6682,20 @@
if (!Entry) {
std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
- llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
+ llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName,
+ ID->isWeakImported());
Entry =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
- false, llvm::GlobalValue::InternalLinkage,
+ false, llvm::GlobalValue::PrivateLinkage,
ClassGV,
"\01L_OBJC_CLASSLIST_SUP_REFS_$_");
Entry->setAlignment(
CGM.getDataLayout().getABITypeAlignment(
ObjCTypes.ClassnfABIPtrTy));
Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
- CGM.AddUsedGlobal(Entry);
+ CGM.addCompilerUsedGlobal(Entry);
}
-
+ assertPrivateName(Entry);
return CGF.Builder.CreateLoad(Entry);
}
@@ -6726,23 +6705,23 @@
llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
const ObjCInterfaceDecl *ID) {
llvm::GlobalVariable * &Entry = MetaClassReferences[ID->getIdentifier()];
- if (Entry)
- return CGF.Builder.CreateLoad(Entry);
+ if (!Entry) {
- std::string MetaClassName(getMetaclassSymbolPrefix() + ID->getNameAsString());
- llvm::GlobalVariable *MetaClassGV = GetClassGlobal(MetaClassName);
- Entry =
- new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
- llvm::GlobalValue::InternalLinkage,
- MetaClassGV,
- "\01L_OBJC_CLASSLIST_SUP_REFS_$_");
- Entry->setAlignment(
- CGM.getDataLayout().getABITypeAlignment(
- ObjCTypes.ClassnfABIPtrTy));
+ std::string MetaClassName(getMetaclassSymbolPrefix() +
+ ID->getNameAsString());
+ llvm::GlobalVariable *MetaClassGV = GetClassGlobal(MetaClassName);
+ Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
+ false, llvm::GlobalValue::PrivateLinkage,
+ MetaClassGV,
+ "\01L_OBJC_CLASSLIST_SUP_REFS_$_");
+ Entry->setAlignment(
+ CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABIPtrTy));
- Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
- CGM.AddUsedGlobal(Entry);
+ Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+ CGM.addCompilerUsedGlobal(Entry);
+ }
+ assertPrivateName(Entry);
return CGF.Builder.CreateLoad(Entry);
}
@@ -6752,8 +6731,9 @@
const ObjCInterfaceDecl *ID) {
if (ID->isWeakImported()) {
std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
- llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
- ClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+ llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, true);
+ (void)ClassGV;
+ assert(ClassGV->getLinkage() == llvm::GlobalValue::ExternalWeakLinkage);
}
return EmitClassRef(CGF, ID);
@@ -6819,12 +6799,13 @@
ObjCTypes.SelectorPtrTy);
Entry =
new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.SelectorPtrTy, false,
- llvm::GlobalValue::InternalLinkage,
+ llvm::GlobalValue::PrivateLinkage,
Casted, "\01L_OBJC_SELECTOR_REFERENCES_");
Entry->setExternallyInitialized(true);
Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
- CGM.AddUsedGlobal(Entry);
+ CGM.addCompilerUsedGlobal(Entry);
}
+ assertPrivateName(Entry);
if (lval)
return Entry;
@@ -7054,27 +7035,29 @@
llvm::Constant *Init =
llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
+ llvm::GlobalValue::LinkageTypes L = ForDefinition
+ ? llvm::GlobalValue::ExternalLinkage
+ : llvm::GlobalValue::WeakAnyLinkage;
if (Entry) {
Entry->setInitializer(Init);
} else {
Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
- llvm::GlobalValue::WeakAnyLinkage,
+ L,
Init,
("OBJC_EHTYPE_$_" +
ID->getIdentifier()->getName()));
}
+ assert(Entry->getLinkage() == L);
if (ID->getVisibility() == HiddenVisibility)
Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment(
ObjCTypes.EHTypeTy));
- if (ForDefinition) {
+ if (ForDefinition)
Entry->setSection("__DATA,__objc_const");
- Entry->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else {
+ else
Entry->setSection("__DATA,__datacoal_nt,coalesced");
- }
return Entry;
}
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index d097b6f..8d6c653 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -21,7 +21,7 @@
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtObjC.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/CallSite.h"
using namespace clang;
using namespace CodeGen;
@@ -158,7 +158,7 @@
bool MightThrow;
llvm::Value *Fn;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
if (!MightThrow) {
CGF.Builder.CreateCall(Fn)->setDoesNotThrow();
return;
@@ -303,7 +303,7 @@
CallSyncExit(llvm::Value *SyncExitFn, llvm::Value *SyncArg)
: SyncExitFn(SyncExitFn), SyncArg(SyncArg) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.Builder.CreateCall(SyncExitFn, SyncArg)->setDoesNotThrow();
}
};
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index 7f030f2..2a8ae90 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -268,7 +268,8 @@
const CodeGen::CGBlockInfo &blockInfo) = 0;
virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
QualType T) = 0;
- virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) = 0;
+ virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+ bool Weak = false) = 0;
struct MessageSendInfo {
const CGFunctionInfo &CallInfo;
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 7c454ac..6e1a3c9 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -33,32 +33,35 @@
assert(T->isOpenCLSpecificType() &&
"Not an OpenCL specific type!");
+ llvm::LLVMContext& Ctx = CGM.getLLVMContext();
+ uint32_t ImgAddrSpc =
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
switch (cast<BuiltinType>(T)->getKind()) {
default:
llvm_unreachable("Unexpected opencl builtin type!");
return 0;
case BuiltinType::OCLImage1d:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image1d_t"), 0);
+ Ctx, "opencl.image1d_t"), ImgAddrSpc);
case BuiltinType::OCLImage1dArray:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image1d_array_t"), 0);
+ Ctx, "opencl.image1d_array_t"), ImgAddrSpc);
case BuiltinType::OCLImage1dBuffer:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image1d_buffer_t"), 0);
+ Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc);
case BuiltinType::OCLImage2d:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image2d_t"), 0);
+ Ctx, "opencl.image2d_t"), ImgAddrSpc);
case BuiltinType::OCLImage2dArray:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image2d_array_t"), 0);
+ Ctx, "opencl.image2d_array_t"), ImgAddrSpc);
case BuiltinType::OCLImage3d:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.image3d_t"), 0);
+ Ctx, "opencl.image3d_t"), ImgAddrSpc);
case BuiltinType::OCLSampler:
- return llvm::IntegerType::get(CGM.getLLVMContext(),32);
+ return llvm::IntegerType::get(Ctx, 32);
case BuiltinType::OCLEvent:
return llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.event_t"), 0);
+ Ctx, "opencl.event_t"), 0);
}
}
diff --git a/lib/CodeGen/CGRTTI.cpp b/lib/CodeGen/CGRTTI.cpp
index aa687b9..7049df7 100644
--- a/lib/CodeGen/CGRTTI.cpp
+++ b/lib/CodeGen/CGRTTI.cpp
@@ -332,11 +332,11 @@
switch (Ty->getLinkage()) {
case NoLinkage:
- case VisibleNoLinkage:
case InternalLinkage:
case UniqueExternalLinkage:
return llvm::GlobalValue::InternalLinkage;
+ case VisibleNoLinkage:
case ExternalLinkage:
if (!CGM.getLangOpts().RTTI) {
// RTTI is not enabled, which means that this type info struct is going
@@ -544,8 +544,25 @@
// And the name.
llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage);
+ llvm::Constant *TypeNameField;
- Fields.push_back(llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy));
+ // If we're supposed to demote the visibility, be sure to set a flag
+ // to use a string comparison for type_info comparisons.
+ CGCXXABI::RTTIUniquenessKind RTTIUniqueness =
+ CGM.getCXXABI().classifyRTTIUniqueness(Ty, Linkage);
+ if (RTTIUniqueness != CGCXXABI::RUK_Unique) {
+ // The flag is the sign bit, which on ARM64 is defined to be clear
+ // for global pointers. This is very ARM64-specific.
+ TypeNameField = llvm::ConstantExpr::getPtrToInt(TypeName, CGM.Int64Ty);
+ llvm::Constant *flag =
+ llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63);
+ TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag);
+ TypeNameField =
+ llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
+ } else {
+ TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy);
+ }
+ Fields.push_back(TypeNameField);
switch (Ty->getTypeClass()) {
#define TYPE(Class, Base)
@@ -644,32 +661,35 @@
OldGV->eraseFromParent();
}
- // GCC only relies on the uniqueness of the type names, not the
- // type_infos themselves, so we can emit these as hidden symbols.
- // But don't do this if we're worried about strict visibility
- // compatibility.
- if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
- const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
+ // The Itanium ABI specifies that type_info objects must be globally
+ // unique, with one exception: if the type is an incomplete class
+ // type or a (possibly indirect) pointer to one. That exception
+ // affects the general case of comparing type_info objects produced
+ // by the typeid operator, which is why the comparison operators on
+ // std::type_info generally use the type_info name pointers instead
+ // of the object addresses. However, the language's built-in uses
+ // of RTTI generally require class types to be complete, even when
+ // manipulating pointers to those class types. This allows the
+ // implementation of dynamic_cast to rely on address equality tests,
+ // which is much faster.
- CGM.setTypeVisibility(GV, RD, CodeGenModule::TVK_ForRTTI);
- CGM.setTypeVisibility(TypeName, RD, CodeGenModule::TVK_ForRTTIName);
- } else {
- Visibility TypeInfoVisibility = DefaultVisibility;
- if (CGM.getCodeGenOpts().HiddenWeakVTables &&
- Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
- TypeInfoVisibility = HiddenVisibility;
+ // All of this is to say that it's important that both the type_info
+ // object and the type_info name be uniqued when weakly emitted.
- // The type name should have the same visibility as the type itself.
- Visibility ExplicitVisibility = Ty->getVisibility();
- TypeName->setVisibility(CodeGenModule::
- GetLLVMVisibility(ExplicitVisibility));
-
- TypeInfoVisibility = minVisibility(TypeInfoVisibility, Ty->getVisibility());
- GV->setVisibility(CodeGenModule::GetLLVMVisibility(TypeInfoVisibility));
+ // Give the type_info object and name the formal visibility of the
+ // type itself.
+ Visibility formalVisibility = Ty->getVisibility();
+ llvm::GlobalValue::VisibilityTypes llvmVisibility =
+ CodeGenModule::GetLLVMVisibility(formalVisibility);
+ TypeName->setVisibility(llvmVisibility);
+ GV->setVisibility(llvmVisibility);
+
+ // FIXME: integrate this better into the above when we move to trunk
+ if (RTTIUniqueness == CGCXXABI::RUK_NonUniqueHidden) {
+ TypeName->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
}
- GV->setUnnamedAddr(true);
-
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
}
@@ -766,9 +786,8 @@
}
// Walk all bases.
- for (CXXRecordDecl::base_class_const_iterator I = BaseDecl->bases_begin(),
- E = BaseDecl->bases_end(); I != E; ++I)
- Flags |= ComputeVMIClassTypeInfoFlags(I, Bases);
+ for (const auto &I : BaseDecl->bases())
+ Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
return Flags;
}
@@ -778,9 +797,8 @@
SeenBases Bases;
// Walk all bases.
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I)
- Flags |= ComputeVMIClassTypeInfoFlags(I, Bases);
+ for (const auto &I : RD->bases())
+ Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
return Flags;
}
@@ -827,15 +845,12 @@
// __offset_shift = 8
// };
// };
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
- const CXXBaseSpecifier *Base = I;
-
+ for (const auto &Base : RD->bases()) {
// The __base_type member points to the RTTI for the base type.
- Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(Base->getType()));
+ Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(Base.getType()));
const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
int64_t OffsetFlags = 0;
@@ -844,7 +859,7 @@
// subobject. For a virtual base, this is the offset in the virtual table of
// the virtual base offset for the virtual base referenced (negative).
CharUnits Offset;
- if (Base->isVirtual())
+ if (Base.isVirtual())
Offset =
CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(RD, BaseDecl);
else {
@@ -856,9 +871,9 @@
// The low-order byte of __offset_flags contains flags, as given by the
// masks from the enumeration __offset_flags_masks.
- if (Base->isVirtual())
+ if (Base.isVirtual())
OffsetFlags |= BCTI_Virtual;
- if (Base->getAccessSpecifier() == AS_public)
+ if (Base.getAccessSpecifier() == AS_public)
OffsetFlags |= BCTI_Public;
Fields.push_back(llvm::ConstantInt::get(LongLTy, OffsetFlags));
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index b29fc98..0fc7b8a 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -130,7 +130,7 @@
llvm::DenseMap<const FieldDecl *, CGBitFieldInfo> BitFields;
// FIXME: Maybe we could use a CXXBaseSpecifier as the key and use a single
- // map for both virtual and non virtual bases.
+ // map for both virtual and non-virtual bases.
llvm::DenseMap<const CXXRecordDecl *, unsigned> NonVirtualBases;
/// Map from virtual bases to their field index in the complete object.
@@ -201,7 +201,7 @@
/// \brief Return the BitFieldInfo that corresponds to the field FD.
const CGBitFieldInfo &getBitFieldInfo(const FieldDecl *FD) const {
- assert(FD->isBitField() && "Invalid call for non bit-field decl!");
+ assert(FD->isBitField() && "Invalid call for non-bit-field decl!");
llvm::DenseMap<const FieldDecl *, CGBitFieldInfo>::const_iterator
it = BitFields.find(FD);
assert(it != BitFields.end() && "Unable to find bitfield info");
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index ab92563..75b4504 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -25,205 +25,560 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace CodeGen;
namespace {
-
-class CGRecordLayoutBuilder {
-public:
- /// FieldTypes - Holds the LLVM types that the struct is created from.
- ///
+/// The CGRecordLowering is responsible for lowering an ASTRecordLayout to an
+/// llvm::Type. Some of the lowering is straightforward, some is not. Here we
+/// detail some of the complexities and weirdnesses here.
+/// * LLVM does not have unions - Unions can, in theory be represented by any
+/// llvm::Type with correct size. We choose a field via a specific heuristic
+/// and add padding if necessary.
+/// * LLVM does not have bitfields - Bitfields are collected into contiguous
+/// runs and allocated as a single storage type for the run. ASTRecordLayout
+/// contains enough information to determine where the runs break. Microsoft
+/// and Itanium follow different rules and use different codepaths.
+/// * It is desired that, when possible, bitfields use the appropriate iN type
+/// when lowered to llvm types. For example unsigned x : 24 gets lowered to
+/// i24. This isn't always possible because i24 has storage size of 32 bit
+/// and if it is possible to use that extra byte of padding we must use
+/// [i8 x 3] instead of i24. The function clipTailPadding does this.
+/// C++ examples that require clipping:
+/// struct { int a : 24; char b; }; // a must be clipped, b goes at offset 3
+/// struct A { int a : 24; }; // a must be clipped because a struct like B
+// could exist: struct B : A { char b; }; // b goes at offset 3
+/// * Clang ignores 0 sized bitfields and 0 sized bases but *not* zero sized
+/// fields. The existing asserts suggest that LLVM assumes that *every* field
+/// has an underlying storage type. Therefore empty structures containing
+/// zero sized subobjects such as empty records or zero sized arrays still get
+/// a zero sized (empty struct) storage type.
+/// * Clang reads the complete type rather than the base type when generating
+/// code to access fields. Bitfields in tail position with tail padding may
+/// be clipped in the base class but not the complete class (we may discover
+/// that the tail padding is not used in the complete class.) However,
+/// because LLVM reads from the complete type it can generate incorrect code
+/// if we do not clip the tail padding off of the bitfield in the complete
+/// layout. This introduces a somewhat awkward extra unnecessary clip stage.
+/// The location of the clip is stored internally as a sentinal of type
+/// SCISSOR. If LLVM were updated to read base types (which it probably
+/// should because locations of things such as VBases are bogus in the llvm
+/// type anyway) then we could eliminate the SCISSOR.
+/// * Itanium allows nearly empty primary virtual bases. These bases don't get
+/// get their own storage because they're laid out as part of another base
+/// or at the beginning of the structure. Determining if a VBase actually
+/// gets storage awkwardly involves a walk of all bases.
+/// * VFPtrs and VBPtrs do *not* make a record NotZeroInitializable.
+struct CGRecordLowering {
+ // MemberInfo is a helper structure that contains information about a record
+ // member. In additional to the standard member types, there exists a
+ // sentinal member type that ensures correct rounding.
+ struct MemberInfo {
+ CharUnits Offset;
+ enum InfoKind { VFPtr, VBPtr, Field, Base, VBase, Scissor } Kind;
+ llvm::Type *Data;
+ union {
+ const FieldDecl *FD;
+ const CXXRecordDecl *RD;
+ };
+ MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
+ const FieldDecl *FD = 0)
+ : Offset(Offset), Kind(Kind), Data(Data), FD(FD) {}
+ MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
+ const CXXRecordDecl *RD)
+ : Offset(Offset), Kind(Kind), Data(Data), RD(RD) {}
+ // MemberInfos are sorted so we define a < operator.
+ bool operator <(const MemberInfo& a) const { return Offset < a.Offset; }
+ };
+ // The constructor.
+ CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D);
+ // Short helper routines.
+ /// \brief Constructs a MemberInfo instance from an offset and llvm::Type *.
+ MemberInfo StorageInfo(CharUnits Offset, llvm::Type *Data) {
+ return MemberInfo(Offset, MemberInfo::Field, Data);
+ }
+ bool useMSABI() {
+ return Context.getTargetInfo().getCXXABI().isMicrosoft() ||
+ D->isMsStruct(Context);
+ }
+ /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
+ llvm::Type *getIntNType(uint64_t NumBits) {
+ return llvm::Type::getIntNTy(Types.getLLVMContext(),
+ (unsigned)llvm::RoundUpToAlignment(NumBits, 8));
+ }
+ /// \brief Gets an llvm type of size NumBytes and alignment 1.
+ llvm::Type *getByteArrayType(CharUnits NumBytes) {
+ assert(!NumBytes.isZero() && "Empty byte arrays aren't allowed.");
+ llvm::Type *Type = llvm::Type::getInt8Ty(Types.getLLVMContext());
+ return NumBytes == CharUnits::One() ? Type :
+ (llvm::Type *)llvm::ArrayType::get(Type, NumBytes.getQuantity());
+ }
+ /// \brief Gets the storage type for a field decl and handles storage
+ /// for itanium bitfields that are smaller than their declared type.
+ llvm::Type *getStorageType(const FieldDecl *FD) {
+ llvm::Type *Type = Types.ConvertTypeForMem(FD->getType());
+ return useMSABI() || !FD->isBitField() ? Type :
+ getIntNType(std::min(FD->getBitWidthValue(Context),
+ (unsigned)Context.toBits(getSize(Type))));
+ }
+ /// \brief Gets the llvm Basesubobject type from a CXXRecordDecl.
+ llvm::Type *getStorageType(const CXXRecordDecl *RD) {
+ return Types.getCGRecordLayout(RD).getBaseSubobjectLLVMType();
+ }
+ CharUnits bitsToCharUnits(uint64_t BitOffset) {
+ return Context.toCharUnitsFromBits(BitOffset);
+ }
+ CharUnits getSize(llvm::Type *Type) {
+ return CharUnits::fromQuantity(DataLayout.getTypeAllocSize(Type));
+ }
+ CharUnits getAlignment(llvm::Type *Type) {
+ return CharUnits::fromQuantity(DataLayout.getABITypeAlignment(Type));
+ }
+ bool isZeroInitializable(const FieldDecl *FD) {
+ const Type *Type = FD->getType()->getBaseElementTypeUnsafe();
+ if (const MemberPointerType *MPT = Type->getAs<MemberPointerType>())
+ return Types.getCXXABI().isZeroInitializable(MPT);
+ if (const RecordType *RT = Type->getAs<RecordType>())
+ return isZeroInitializable(RT->getDecl());
+ return true;
+ }
+ bool isZeroInitializable(const RecordDecl *RD) {
+ return Types.getCGRecordLayout(RD).isZeroInitializable();
+ }
+ void appendPaddingBytes(CharUnits Size) {
+ if (!Size.isZero())
+ FieldTypes.push_back(getByteArrayType(Size));
+ }
+ uint64_t getFieldBitOffset(const FieldDecl *FD) {
+ return Layout.getFieldOffset(FD->getFieldIndex());
+ }
+ // Layout routines.
+ void setBitFieldInfo(const FieldDecl *FD, CharUnits StartOffset,
+ llvm::Type *StorageType);
+ /// \brief Lowers an ASTRecordLayout to a llvm type.
+ void lower(bool NonVirtualBaseType);
+ void lowerUnion();
+ void accumulateFields();
+ void accumulateBitFields(RecordDecl::field_iterator Field,
+ RecordDecl::field_iterator FieldEnd);
+ void accumulateBases();
+ void accumulateVPtrs();
+ void accumulateVBases();
+ /// \brief Recursively searches all of the bases to find out if a vbase is
+ /// not the primary vbase of some base class.
+ bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query);
+ void calculateZeroInit();
+ /// \brief Lowers bitfield storage types to I8 arrays for bitfields with tail
+ /// padding that is or can potentially be used.
+ void clipTailPadding();
+ /// \brief Determines if we need a packed llvm struct.
+ void determinePacked();
+ /// \brief Inserts padding everwhere it's needed.
+ void insertPadding();
+ /// \brief Fills out the structures that are ultimately consumed.
+ void fillOutputFields();
+ // Input memoization fields.
+ CodeGenTypes &Types;
+ const ASTContext &Context;
+ const RecordDecl *D;
+ const CXXRecordDecl *RD;
+ const ASTRecordLayout &Layout;
+ const llvm::DataLayout &DataLayout;
+ // Helpful intermediate data-structures.
+ std::vector<MemberInfo> Members;
+ // Output fields, consumed by CodeGenTypes::ComputeRecordLayout.
SmallVector<llvm::Type *, 16> FieldTypes;
-
- /// BaseSubobjectType - Holds the LLVM type for the non-virtual part
- /// of the struct. For example, consider:
- ///
- /// struct A { int i; };
- /// struct B { void *v; };
- /// struct C : virtual A, B { };
- ///
- /// The LLVM type of C will be
- /// %struct.C = type { i32 (...)**, %struct.A, i32, %struct.B }
- ///
- /// And the LLVM type of the non-virtual base struct will be
- /// %struct.C.base = type { i32 (...)**, %struct.A, i32 }
- ///
- /// This only gets initialized if the base subobject type is
- /// different from the complete-object type.
- llvm::StructType *BaseSubobjectType;
-
- /// FieldInfo - Holds a field and its corresponding LLVM field number.
llvm::DenseMap<const FieldDecl *, unsigned> Fields;
-
- /// BitFieldInfo - Holds location and size information about a bit field.
llvm::DenseMap<const FieldDecl *, CGBitFieldInfo> BitFields;
-
llvm::DenseMap<const CXXRecordDecl *, unsigned> NonVirtualBases;
llvm::DenseMap<const CXXRecordDecl *, unsigned> VirtualBases;
-
- /// IndirectPrimaryBases - Virtual base classes, direct or indirect, that are
- /// primary base classes for some other direct or indirect base class.
- CXXIndirectPrimaryBaseSet IndirectPrimaryBases;
-
- /// LaidOutVirtualBases - A set of all laid out virtual bases, used to avoid
- /// avoid laying out virtual bases more than once.
- llvm::SmallPtrSet<const CXXRecordDecl *, 4> LaidOutVirtualBases;
-
- /// IsZeroInitializable - Whether this struct can be C++
- /// zero-initialized with an LLVM zeroinitializer.
- bool IsZeroInitializable;
- bool IsZeroInitializableAsBase;
-
- /// Packed - Whether the resulting LLVM struct will be packed or not.
- bool Packed;
-
+ bool IsZeroInitializable : 1;
+ bool IsZeroInitializableAsBase : 1;
+ bool Packed : 1;
private:
- CodeGenTypes &Types;
-
- /// LastLaidOutBaseInfo - Contains the offset and non-virtual size of the
- /// last base laid out. Used so that we can replace the last laid out base
- /// type with an i8 array if needed.
- struct LastLaidOutBaseInfo {
- CharUnits Offset;
- CharUnits NonVirtualSize;
-
- bool isValid() const { return !NonVirtualSize.isZero(); }
- void invalidate() { NonVirtualSize = CharUnits::Zero(); }
-
- } LastLaidOutBase;
-
- /// Alignment - Contains the alignment of the RecordDecl.
- CharUnits Alignment;
-
- /// NextFieldOffset - Holds the next field offset.
- CharUnits NextFieldOffset;
-
- /// LayoutUnionField - Will layout a field in an union and return the type
- /// that the field will have.
- llvm::Type *LayoutUnionField(const FieldDecl *Field,
- const ASTRecordLayout &Layout);
-
- /// LayoutUnion - Will layout a union RecordDecl.
- void LayoutUnion(const RecordDecl *D);
-
- /// Lay out a sequence of contiguous bitfields.
- bool LayoutBitfields(const ASTRecordLayout &Layout,
- unsigned &FirstFieldNo,
- RecordDecl::field_iterator &FI,
- RecordDecl::field_iterator FE);
-
- /// LayoutFields - try to layout all fields in the record decl.
- /// Returns false if the operation failed because the struct is not packed.
- bool LayoutFields(const RecordDecl *D);
-
- /// Layout a single base, virtual or non-virtual
- bool LayoutBase(const CXXRecordDecl *base,
- const CGRecordLayout &baseLayout,
- CharUnits baseOffset);
-
- /// LayoutVirtualBase - layout a single virtual base.
- bool LayoutVirtualBase(const CXXRecordDecl *base,
- CharUnits baseOffset);
-
- /// LayoutVirtualBases - layout the virtual bases of a record decl.
- bool LayoutVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout);
-
- /// MSLayoutVirtualBases - layout the virtual bases of a record decl,
- /// like MSVC.
- bool MSLayoutVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout);
-
- /// LayoutNonVirtualBase - layout a single non-virtual base.
- bool LayoutNonVirtualBase(const CXXRecordDecl *base,
- CharUnits baseOffset);
-
- /// LayoutNonVirtualBases - layout the virtual bases of a record decl.
- bool LayoutNonVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout);
-
- /// ComputeNonVirtualBaseType - Compute the non-virtual base field types.
- bool ComputeNonVirtualBaseType(const CXXRecordDecl *RD);
-
- /// LayoutField - layout a single field. Returns false if the operation failed
- /// because the current struct is not packed.
- bool LayoutField(const FieldDecl *D, uint64_t FieldOffset);
-
- /// LayoutBitField - layout a single bit field.
- void LayoutBitField(const FieldDecl *D, uint64_t FieldOffset);
-
- /// AppendField - Appends a field with the given offset and type.
- void AppendField(CharUnits fieldOffset, llvm::Type *FieldTy);
-
- /// AppendPadding - Appends enough padding bytes so that the total
- /// struct size is a multiple of the field alignment.
- void AppendPadding(CharUnits fieldOffset, CharUnits fieldAlignment);
-
- /// ResizeLastBaseFieldIfNecessary - Fields and bases can be laid out in the
- /// tail padding of a previous base. If this happens, the type of the previous
- /// base needs to be changed to an array of i8. Returns true if the last
- /// laid out base was resized.
- bool ResizeLastBaseFieldIfNecessary(CharUnits offset);
-
- /// getByteArrayType - Returns a byte array type with the given number of
- /// elements.
- llvm::Type *getByteArrayType(CharUnits NumBytes);
-
- /// AppendBytes - Append a given number of bytes to the record.
- void AppendBytes(CharUnits numBytes);
-
- /// AppendTailPadding - Append enough tail padding so that the type will have
- /// the passed size.
- void AppendTailPadding(CharUnits RecordSize);
-
- CharUnits getTypeAlignment(llvm::Type *Ty) const;
-
- /// getAlignmentAsLLVMStruct - Returns the maximum alignment of all the
- /// LLVM element types.
- CharUnits getAlignmentAsLLVMStruct() const;
-
- /// CheckZeroInitializable - Check if the given type contains a pointer
- /// to data member.
- void CheckZeroInitializable(QualType T);
-
-public:
- CGRecordLayoutBuilder(CodeGenTypes &Types)
- : BaseSubobjectType(0),
- IsZeroInitializable(true), IsZeroInitializableAsBase(true),
- Packed(false), Types(Types) { }
-
- /// Layout - Will layout a RecordDecl.
- void Layout(const RecordDecl *D);
+ CGRecordLowering(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
+ void operator =(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
};
+} // namespace {
+CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D)
+ : Types(Types), Context(Types.getContext()), D(D),
+ RD(dyn_cast<CXXRecordDecl>(D)),
+ Layout(Types.getContext().getASTRecordLayout(D)),
+ DataLayout(Types.getDataLayout()), IsZeroInitializable(true),
+ IsZeroInitializableAsBase(true), Packed(false) {}
+
+void CGRecordLowering::setBitFieldInfo(
+ const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType) {
+ CGBitFieldInfo &Info = BitFields[FD];
+ Info.IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
+ Info.Offset = (unsigned)(getFieldBitOffset(FD) - Context.toBits(StartOffset));
+ Info.Size = FD->getBitWidthValue(Context);
+ Info.StorageSize = (unsigned)DataLayout.getTypeAllocSizeInBits(StorageType);
+ // Here we calculate the actual storage alignment of the bits. E.g if we've
+ // got an alignment >= 2 and the bitfield starts at offset 6 we've got an
+ // alignment of 2.
+ Info.StorageAlignment =
+ Layout.getAlignment().alignmentAtOffset(StartOffset).getQuantity();
+ if (Info.Size > Info.StorageSize)
+ Info.Size = Info.StorageSize;
+ // Reverse the bit offsets for big endian machines. Because we represent
+ // a bitfield as a single large integer load, we can imagine the bits
+ // counting from the most-significant-bit instead of the
+ // least-significant-bit.
+ if (DataLayout.isBigEndian())
+ Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
}
-void CGRecordLayoutBuilder::Layout(const RecordDecl *D) {
- const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(D);
- Alignment = Layout.getAlignment();
- Packed = D->hasAttr<PackedAttr>() || Layout.getSize() % Alignment != 0;
+void CGRecordLowering::lower(bool NVBaseType) {
+ // The lowering process implemented in this function takes a variety of
+ // carefully ordered phases.
+ // 1) Store all members (fields and bases) in a list and sort them by offset.
+ // 2) Add a 1-byte capstone member at the Size of the structure.
+ // 3) Clip bitfield storages members if their tail padding is or might be
+ // used by another field or base. The clipping process uses the capstone
+ // by treating it as another object that occurs after the record.
+ // 4) Determine if the llvm-struct requires packing. It's important that this
+ // phase occur after clipping, because clipping changes the llvm type.
+ // This phase reads the offset of the capstone when determining packedness
+ // and updates the alignment of the capstone to be equal of the alignment
+ // of the record after doing so.
+ // 5) Insert padding everywhere it is needed. This phase requires 'Packed' to
+ // have been computed and needs to know the alignment of the record in
+ // order to understand if explicit tail padding is needed.
+ // 6) Remove the capstone, we don't need it anymore.
+ // 7) Determine if this record can be zero-initialized. This phase could have
+ // been placed anywhere after phase 1.
+ // 8) Format the complete list of members in a way that can be consumed by
+ // CodeGenTypes::ComputeRecordLayout.
+ CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize();
+ if (D->isUnion())
+ return lowerUnion();
+ accumulateFields();
+ // RD implies C++.
+ if (RD) {
+ accumulateVPtrs();
+ accumulateBases();
+ if (Members.empty())
+ return appendPaddingBytes(Size);
+ if (!NVBaseType)
+ accumulateVBases();
+ }
+ std::stable_sort(Members.begin(), Members.end());
+ Members.push_back(StorageInfo(Size, getIntNType(8)));
+ clipTailPadding();
+ determinePacked();
+ insertPadding();
+ Members.pop_back();
+ calculateZeroInit();
+ fillOutputFields();
+}
- if (D->isUnion()) {
- LayoutUnion(D);
+void CGRecordLowering::lowerUnion() {
+ CharUnits LayoutSize = Layout.getSize();
+ llvm::Type *StorageType = 0;
+ // Compute zero-initializable status.
+ if (!D->field_empty() && !isZeroInitializable(*D->field_begin()))
+ IsZeroInitializable = IsZeroInitializableAsBase = false;
+ // Iterate through the fields setting bitFieldInfo and the Fields array. Also
+ // locate the "most appropriate" storage type. The heuristic for finding the
+ // storage type isn't necessary, the first (non-0-length-bitfield) field's
+ // type would work fine and be simpler but would be differen than what we've
+ // been doing and cause lit tests to change.
+ for (const auto *Field : D->fields()) {
+ if (Field->isBitField()) {
+ // Skip 0 sized bitfields.
+ if (Field->getBitWidthValue(Context) == 0)
+ continue;
+ llvm::Type *FieldType = getStorageType(Field);
+ if (LayoutSize < getSize(FieldType))
+ FieldType = getByteArrayType(LayoutSize);
+ setBitFieldInfo(Field, CharUnits::Zero(), FieldType);
+ }
+ Fields[Field] = 0;
+ llvm::Type *FieldType = getStorageType(Field);
+ // Conditionally update our storage type if we've got a new "better" one.
+ if (!StorageType ||
+ getAlignment(FieldType) > getAlignment(StorageType) ||
+ (getAlignment(FieldType) == getAlignment(StorageType) &&
+ getSize(FieldType) > getSize(StorageType)))
+ StorageType = FieldType;
+ }
+ // If we have no storage type just pad to the appropriate size and return.
+ if (!StorageType)
+ return appendPaddingBytes(LayoutSize);
+ // If our storage size was bigger than our required size (can happen in the
+ // case of packed bitfields on Itanium) then just use an I8 array.
+ if (LayoutSize < getSize(StorageType))
+ StorageType = getByteArrayType(LayoutSize);
+ FieldTypes.push_back(StorageType);
+ appendPaddingBytes(LayoutSize - getSize(StorageType));
+ // Set packed if we need it.
+ if (LayoutSize % getAlignment(StorageType))
+ Packed = true;
+}
+
+void CGRecordLowering::accumulateFields() {
+ for (RecordDecl::field_iterator Field = D->field_begin(),
+ FieldEnd = D->field_end();
+ Field != FieldEnd;)
+ if (Field->isBitField()) {
+ RecordDecl::field_iterator Start = Field;
+ // Iterate to gather the list of bitfields.
+ for (++Field; Field != FieldEnd && Field->isBitField(); ++Field);
+ accumulateBitFields(Start, Field);
+ } else {
+ Members.push_back(MemberInfo(
+ bitsToCharUnits(getFieldBitOffset(*Field)), MemberInfo::Field,
+ getStorageType(*Field), *Field));
+ ++Field;
+ }
+}
+
+void
+CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
+ RecordDecl::field_iterator FieldEnd) {
+ // Run stores the first element of the current run of bitfields. FieldEnd is
+ // used as a special value to note that we don't have a current run. A
+ // bitfield run is a contiguous collection of bitfields that can be stored in
+ // the same storage block. Zero-sized bitfields and bitfields that would
+ // cross an alignment boundary break a run and start a new one.
+ RecordDecl::field_iterator Run = FieldEnd;
+ // Tail is the offset of the first bit off the end of the current run. It's
+ // used to determine if the ASTRecordLayout is treating these two bitfields as
+ // contiguous. StartBitOffset is offset of the beginning of the Run.
+ uint64_t StartBitOffset, Tail = 0;
+ if (useMSABI()) {
+ for (; Field != FieldEnd; ++Field) {
+ uint64_t BitOffset = getFieldBitOffset(*Field);
+ // Zero-width bitfields end runs.
+ if (Field->getBitWidthValue(Context) == 0) {
+ Run = FieldEnd;
+ continue;
+ }
+ llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
+ // If we don't have a run yet, or don't live within the previous run's
+ // allocated storage then we allocate some storage and start a new run.
+ if (Run == FieldEnd || BitOffset >= Tail) {
+ Run = Field;
+ StartBitOffset = BitOffset;
+ Tail = StartBitOffset + DataLayout.getTypeAllocSizeInBits(Type);
+ // Add the storage member to the record. This must be added to the
+ // record before the bitfield members so that it gets laid out before
+ // the bitfields it contains get laid out.
+ Members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
+ }
+ // Bitfields get the offset of their storage but come afterward and remain
+ // there after a stable sort.
+ Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
+ MemberInfo::Field, 0, *Field));
+ }
return;
}
+ for (;;) {
+ // Check to see if we need to start a new run.
+ if (Run == FieldEnd) {
+ // If we're out of fields, return.
+ if (Field == FieldEnd)
+ break;
+ // Any non-zero-length bitfield can start a new run.
+ if (Field->getBitWidthValue(Context) != 0) {
+ Run = Field;
+ StartBitOffset = getFieldBitOffset(*Field);
+ Tail = StartBitOffset + Field->getBitWidthValue(Context);
+ }
+ ++Field;
+ continue;
+ }
+ // Add bitfields to the run as long as they qualify.
+ if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 &&
+ Tail == getFieldBitOffset(*Field)) {
+ Tail += Field->getBitWidthValue(Context);
+ ++Field;
+ continue;
+ }
+ // We've hit a break-point in the run and need to emit a storage field.
+ llvm::Type *Type = getIntNType(Tail - StartBitOffset);
+ // Add the storage member to the record and set the bitfield info for all of
+ // the bitfields in the run. Bitfields get the offset of their storage but
+ // come afterward and remain there after a stable sort.
+ Members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
+ for (; Run != Field; ++Run)
+ Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
+ MemberInfo::Field, 0, *Run));
+ Run = FieldEnd;
+ }
+}
- if (LayoutFields(D))
+void CGRecordLowering::accumulateBases() {
+ // If we've got a primary virtual base, we need to add it with the bases.
+ if (Layout.isPrimaryBaseVirtual())
+ Members.push_back(StorageInfo(
+ CharUnits::Zero(),
+ getStorageType(Layout.getPrimaryBase())));
+ // Accumulate the non-virtual bases.
+ for (const auto &Base : RD->bases()) {
+ if (Base.isVirtual())
+ continue;
+ const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+ if (!BaseDecl->isEmpty())
+ Members.push_back(MemberInfo(Layout.getBaseClassOffset(BaseDecl),
+ MemberInfo::Base, getStorageType(BaseDecl), BaseDecl));
+ }
+}
+
+void CGRecordLowering::accumulateVPtrs() {
+ if (Layout.hasOwnVFPtr())
+ Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
+ llvm::FunctionType::get(getIntNType(32), /*isVarArg=*/true)->
+ getPointerTo()->getPointerTo()));
+ if (Layout.hasOwnVBPtr())
+ Members.push_back(MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr,
+ llvm::Type::getInt32PtrTy(Types.getLLVMContext())));
+}
+
+void CGRecordLowering::accumulateVBases() {
+ Members.push_back(MemberInfo(Layout.getNonVirtualSize(),
+ MemberInfo::Scissor, 0, RD));
+ for (const auto &Base : RD->vbases()) {
+ const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+ if (BaseDecl->isEmpty())
+ continue;
+ CharUnits Offset = Layout.getVBaseClassOffset(BaseDecl);
+ // If the vbase is a primary virtual base of some base, then it doesn't
+ // get its own storage location but instead lives inside of that base.
+ if (!useMSABI() && Context.isNearlyEmpty(BaseDecl) &&
+ !hasOwnStorage(RD, BaseDecl)) {
+ Members.push_back(MemberInfo(Offset, MemberInfo::VBase, 0, BaseDecl));
+ continue;
+ }
+ // If we've got a vtordisp, add it as a storage type.
+ if (Layout.getVBaseOffsetsMap().find(BaseDecl)->second.hasVtorDisp())
+ Members.push_back(StorageInfo(Offset - CharUnits::fromQuantity(4),
+ getIntNType(32)));
+ Members.push_back(MemberInfo(Offset, MemberInfo::VBase,
+ getStorageType(BaseDecl), BaseDecl));
+ }
+}
+
+bool CGRecordLowering::hasOwnStorage(const CXXRecordDecl *Decl,
+ const CXXRecordDecl *Query) {
+ const ASTRecordLayout &DeclLayout = Context.getASTRecordLayout(Decl);
+ if (DeclLayout.isPrimaryBaseVirtual() && DeclLayout.getPrimaryBase() == Query)
+ return false;
+ for (const auto &Base : Decl->bases())
+ if (!hasOwnStorage(Base.getType()->getAsCXXRecordDecl(), Query))
+ return false;
+ return true;
+}
+
+void CGRecordLowering::calculateZeroInit() {
+ for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+ MemberEnd = Members.end();
+ IsZeroInitializableAsBase && Member != MemberEnd; ++Member) {
+ if (Member->Kind == MemberInfo::Field) {
+ if (!Member->FD || isZeroInitializable(Member->FD))
+ continue;
+ IsZeroInitializable = IsZeroInitializableAsBase = false;
+ } else if (Member->Kind == MemberInfo::Base ||
+ Member->Kind == MemberInfo::VBase) {
+ if (isZeroInitializable(Member->RD))
+ continue;
+ IsZeroInitializable = false;
+ if (Member->Kind == MemberInfo::Base)
+ IsZeroInitializableAsBase = false;
+ }
+ }
+}
+
+void CGRecordLowering::clipTailPadding() {
+ std::vector<MemberInfo>::iterator Prior = Members.begin();
+ CharUnits Tail = getSize(Prior->Data);
+ for (std::vector<MemberInfo>::iterator Member = Prior + 1,
+ MemberEnd = Members.end();
+ Member != MemberEnd; ++Member) {
+ // Only members with data and the scissor can cut into tail padding.
+ if (!Member->Data && Member->Kind != MemberInfo::Scissor)
+ continue;
+ if (Member->Offset < Tail) {
+ assert(Prior->Kind == MemberInfo::Field && !Prior->FD &&
+ "Only storage fields have tail padding!");
+ Prior->Data = getByteArrayType(bitsToCharUnits(llvm::RoundUpToAlignment(
+ cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8)));
+ }
+ if (Member->Data)
+ Prior = Member;
+ Tail = Prior->Offset + getSize(Prior->Data);
+ }
+}
+
+void CGRecordLowering::determinePacked() {
+ CharUnits Alignment = CharUnits::One();
+ for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+ MemberEnd = Members.end();
+ Member != MemberEnd; ++Member) {
+ if (!Member->Data)
+ continue;
+ // If any member falls at an offset that it not a multiple of its alignment,
+ // then the entire record must be packed.
+ if (Member->Offset % getAlignment(Member->Data))
+ Packed = true;
+ Alignment = std::max(Alignment, getAlignment(Member->Data));
+ }
+ // If the size of the record (the capstone's offset) is not a multiple of the
+ // record's alignment, it must be packed.
+ if (Members.back().Offset % Alignment)
+ Packed = true;
+ // Update the alignment of the sentinal.
+ if (!Packed)
+ Members.back().Data = getIntNType(Context.toBits(Alignment));
+}
+
+void CGRecordLowering::insertPadding() {
+ std::vector<std::pair<CharUnits, CharUnits> > Padding;
+ CharUnits Size = CharUnits::Zero();
+ for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+ MemberEnd = Members.end();
+ Member != MemberEnd; ++Member) {
+ if (!Member->Data)
+ continue;
+ CharUnits Offset = Member->Offset;
+ assert(Offset >= Size);
+ // Insert padding if we need to.
+ if (Offset != Size.RoundUpToAlignment(Packed ? CharUnits::One() :
+ getAlignment(Member->Data)))
+ Padding.push_back(std::make_pair(Size, Offset - Size));
+ Size = Offset + getSize(Member->Data);
+ }
+ if (Padding.empty())
return;
+ // Add the padding to the Members list and sort it.
+ for (std::vector<std::pair<CharUnits, CharUnits> >::const_iterator
+ Pad = Padding.begin(), PadEnd = Padding.end();
+ Pad != PadEnd; ++Pad)
+ Members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second)));
+ std::stable_sort(Members.begin(), Members.end());
+}
- // We weren't able to layout the struct. Try again with a packed struct
- Packed = true;
- LastLaidOutBase.invalidate();
- NextFieldOffset = CharUnits::Zero();
- FieldTypes.clear();
- Fields.clear();
- BitFields.clear();
- NonVirtualBases.clear();
- VirtualBases.clear();
-
- LayoutFields(D);
+void CGRecordLowering::fillOutputFields() {
+ for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+ MemberEnd = Members.end();
+ Member != MemberEnd; ++Member) {
+ if (Member->Data)
+ FieldTypes.push_back(Member->Data);
+ if (Member->Kind == MemberInfo::Field) {
+ if (Member->FD)
+ Fields[Member->FD] = FieldTypes.size() - 1;
+ // A field without storage must be a bitfield.
+ if (!Member->Data)
+ setBitFieldInfo(Member->FD, Member->Offset, FieldTypes.back());
+ } else if (Member->Kind == MemberInfo::Base)
+ NonVirtualBases[Member->RD] = FieldTypes.size() - 1;
+ else if (Member->Kind == MemberInfo::VBase)
+ VirtualBases[Member->RD] = FieldTypes.size() - 1;
+ }
}
CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
@@ -231,6 +586,9 @@
uint64_t Offset, uint64_t Size,
uint64_t StorageSize,
uint64_t StorageAlignment) {
+ // This function is vestigial from CGRecordLayoutBuilder days but is still
+ // used in GCObjCRuntime.cpp. That usage has a "fixme" attached to it that
+ // when addressed will allow for the removal of this function.
llvm::Type *Ty = Types.ConvertTypeForMem(FD->getType());
CharUnits TypeSizeInBytes =
CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(Ty));
@@ -262,709 +620,33 @@
return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageAlignment);
}
-/// \brief Layout the range of bitfields from BFI to BFE as contiguous storage.
-bool CGRecordLayoutBuilder::LayoutBitfields(const ASTRecordLayout &Layout,
- unsigned &FirstFieldNo,
- RecordDecl::field_iterator &FI,
- RecordDecl::field_iterator FE) {
- assert(FI != FE);
- uint64_t FirstFieldOffset = Layout.getFieldOffset(FirstFieldNo);
- uint64_t NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
-
- unsigned CharAlign = Types.getTarget().getCharAlign();
- assert(FirstFieldOffset % CharAlign == 0 &&
- "First field offset is misaligned");
- CharUnits FirstFieldOffsetInBytes
- = Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
-
- unsigned StorageAlignment
- = llvm::MinAlign(Alignment.getQuantity(),
- FirstFieldOffsetInBytes.getQuantity());
-
- if (FirstFieldOffset < NextFieldOffsetInBits) {
- CharUnits FieldOffsetInCharUnits =
- Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
-
- // Try to resize the last base field.
- if (!ResizeLastBaseFieldIfNecessary(FieldOffsetInCharUnits))
- llvm_unreachable("We must be able to resize the last base if we need to "
- "pack bits into it.");
-
- NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
- assert(FirstFieldOffset >= NextFieldOffsetInBits);
- }
-
- // Append padding if necessary.
- AppendPadding(Types.getContext().toCharUnitsFromBits(FirstFieldOffset),
- CharUnits::One());
-
- // Find the last bitfield in a contiguous run of bitfields.
- RecordDecl::field_iterator BFI = FI;
- unsigned LastFieldNo = FirstFieldNo;
- uint64_t NextContiguousFieldOffset = FirstFieldOffset;
- for (RecordDecl::field_iterator FJ = FI;
- (FJ != FE && (*FJ)->isBitField() &&
- NextContiguousFieldOffset == Layout.getFieldOffset(LastFieldNo) &&
- (*FJ)->getBitWidthValue(Types.getContext()) != 0); FI = FJ++) {
- NextContiguousFieldOffset += (*FJ)->getBitWidthValue(Types.getContext());
- ++LastFieldNo;
-
- // We must use packed structs for packed fields, and also unnamed bit
- // fields since they don't affect the struct alignment.
- if (!Packed && ((*FJ)->hasAttr<PackedAttr>() || !(*FJ)->getDeclName()))
- return false;
- }
- RecordDecl::field_iterator BFE = llvm::next(FI);
- --LastFieldNo;
- assert(LastFieldNo >= FirstFieldNo && "Empty run of contiguous bitfields");
- FieldDecl *LastFD = *FI;
-
- // Find the last bitfield's offset, add its size, and round it up to the
- // character alignment to compute the storage required.
- uint64_t LastFieldOffset = Layout.getFieldOffset(LastFieldNo);
- uint64_t LastFieldSize = LastFD->getBitWidthValue(Types.getContext());
- uint64_t TotalBits = (LastFieldOffset + LastFieldSize) - FirstFieldOffset;
- CharUnits StorageBytes = Types.getContext().toCharUnitsFromBits(
- llvm::RoundUpToAlignment(TotalBits, CharAlign));
- uint64_t StorageBits = Types.getContext().toBits(StorageBytes);
-
- // Grow the storage to encompass any known padding in the layout when doing
- // so will make the storage a power-of-two. There are two cases when we can
- // do this. The first is when we have a subsequent field and can widen up to
- // its offset. The second is when the data size of the AST record layout is
- // past the end of the current storage. The latter is true when there is tail
- // padding on a struct and no members of a super class can be packed into it.
- //
- // Note that we widen the storage as much as possible here to express the
- // maximum latitude the language provides, and rely on the backend to lower
- // these in conjunction with shifts and masks to narrower operations where
- // beneficial.
- uint64_t EndOffset = Types.getContext().toBits(Layout.getDataSize());
- if (BFE != FE)
- // If there are more fields to be laid out, the offset at the end of the
- // bitfield is the offset of the next field in the record.
- EndOffset = Layout.getFieldOffset(LastFieldNo + 1);
- assert(EndOffset >= (FirstFieldOffset + TotalBits) &&
- "End offset is not past the end of the known storage bits.");
- uint64_t SpaceBits = EndOffset - FirstFieldOffset;
- uint64_t LongBits = Types.getTarget().getLongWidth();
- uint64_t WidenedBits = (StorageBits / LongBits) * LongBits +
- llvm::NextPowerOf2(StorageBits % LongBits - 1);
- assert(WidenedBits >= StorageBits && "Widening shrunk the bits!");
- if (WidenedBits <= SpaceBits) {
- StorageBits = WidenedBits;
- StorageBytes = Types.getContext().toCharUnitsFromBits(StorageBits);
- assert(StorageBits == (uint64_t)Types.getContext().toBits(StorageBytes));
- }
-
- unsigned FieldIndex = FieldTypes.size();
- AppendBytes(StorageBytes);
-
- // Now walk the bitfields associating them with this field of storage and
- // building up the bitfield specific info.
- unsigned FieldNo = FirstFieldNo;
- for (; BFI != BFE; ++BFI, ++FieldNo) {
- FieldDecl *FD = *BFI;
- uint64_t FieldOffset = Layout.getFieldOffset(FieldNo) - FirstFieldOffset;
- uint64_t FieldSize = FD->getBitWidthValue(Types.getContext());
- Fields[FD] = FieldIndex;
- BitFields[FD] = CGBitFieldInfo::MakeInfo(Types, FD, FieldOffset, FieldSize,
- StorageBits, StorageAlignment);
- }
- FirstFieldNo = LastFieldNo;
- return true;
-}
-
-bool CGRecordLayoutBuilder::LayoutField(const FieldDecl *D,
- uint64_t fieldOffset) {
- // If the field is packed, then we need a packed struct.
- if (!Packed && D->hasAttr<PackedAttr>())
- return false;
-
- assert(!D->isBitField() && "Bitfields should be laid out seperately.");
-
- CheckZeroInitializable(D->getType());
-
- assert(fieldOffset % Types.getTarget().getCharWidth() == 0
- && "field offset is not on a byte boundary!");
- CharUnits fieldOffsetInBytes
- = Types.getContext().toCharUnitsFromBits(fieldOffset);
-
- llvm::Type *Ty = Types.ConvertTypeForMem(D->getType());
- CharUnits typeAlignment = getTypeAlignment(Ty);
-
- // If the type alignment is larger then the struct alignment, we must use
- // a packed struct.
- if (typeAlignment > Alignment) {
- assert(!Packed && "Alignment is wrong even with packed struct!");
- return false;
- }
-
- if (!Packed) {
- if (const RecordType *RT = D->getType()->getAs<RecordType>()) {
- const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
- if (const MaxFieldAlignmentAttr *MFAA =
- RD->getAttr<MaxFieldAlignmentAttr>()) {
- if (MFAA->getAlignment() != Types.getContext().toBits(typeAlignment))
- return false;
- }
- }
- }
-
- // Round up the field offset to the alignment of the field type.
- CharUnits alignedNextFieldOffsetInBytes =
- NextFieldOffset.RoundUpToAlignment(typeAlignment);
-
- if (fieldOffsetInBytes < alignedNextFieldOffsetInBytes) {
- // Try to resize the last base field.
- if (ResizeLastBaseFieldIfNecessary(fieldOffsetInBytes)) {
- alignedNextFieldOffsetInBytes =
- NextFieldOffset.RoundUpToAlignment(typeAlignment);
- }
- }
-
- if (fieldOffsetInBytes < alignedNextFieldOffsetInBytes) {
- assert(!Packed && "Could not place field even with packed struct!");
- return false;
- }
-
- AppendPadding(fieldOffsetInBytes, typeAlignment);
-
- // Now append the field.
- Fields[D] = FieldTypes.size();
- AppendField(fieldOffsetInBytes, Ty);
-
- LastLaidOutBase.invalidate();
- return true;
-}
-
-llvm::Type *
-CGRecordLayoutBuilder::LayoutUnionField(const FieldDecl *Field,
- const ASTRecordLayout &Layout) {
- Fields[Field] = 0;
- if (Field->isBitField()) {
- uint64_t FieldSize = Field->getBitWidthValue(Types.getContext());
-
- // Ignore zero sized bit fields.
- if (FieldSize == 0)
- return 0;
-
- unsigned StorageBits = llvm::RoundUpToAlignment(
- FieldSize, Types.getTarget().getCharAlign());
- CharUnits NumBytesToAppend
- = Types.getContext().toCharUnitsFromBits(StorageBits);
-
- llvm::Type *FieldTy = llvm::Type::getInt8Ty(Types.getLLVMContext());
- if (NumBytesToAppend > CharUnits::One())
- FieldTy = llvm::ArrayType::get(FieldTy, NumBytesToAppend.getQuantity());
-
- // Add the bit field info.
- BitFields[Field] = CGBitFieldInfo::MakeInfo(Types, Field, 0, FieldSize,
- StorageBits,
- Alignment.getQuantity());
- return FieldTy;
- }
-
- // This is a regular union field.
- return Types.ConvertTypeForMem(Field->getType());
-}
-
-void CGRecordLayoutBuilder::LayoutUnion(const RecordDecl *D) {
- assert(D->isUnion() && "Can't call LayoutUnion on a non-union record!");
-
- const ASTRecordLayout &layout = Types.getContext().getASTRecordLayout(D);
-
- llvm::Type *unionType = 0;
- CharUnits unionSize = CharUnits::Zero();
- CharUnits unionAlign = CharUnits::Zero();
-
- bool hasOnlyZeroSizedBitFields = true;
- bool checkedFirstFieldZeroInit = false;
-
- unsigned fieldNo = 0;
- for (RecordDecl::field_iterator field = D->field_begin(),
- fieldEnd = D->field_end(); field != fieldEnd; ++field, ++fieldNo) {
- assert(layout.getFieldOffset(fieldNo) == 0 &&
- "Union field offset did not start at the beginning of record!");
- llvm::Type *fieldType = LayoutUnionField(*field, layout);
-
- if (!fieldType)
- continue;
-
- if (field->getDeclName() && !checkedFirstFieldZeroInit) {
- CheckZeroInitializable(field->getType());
- checkedFirstFieldZeroInit = true;
- }
-
- hasOnlyZeroSizedBitFields = false;
-
- CharUnits fieldAlign = CharUnits::fromQuantity(
- Types.getDataLayout().getABITypeAlignment(fieldType));
- CharUnits fieldSize = CharUnits::fromQuantity(
- Types.getDataLayout().getTypeAllocSize(fieldType));
-
- if (fieldAlign < unionAlign)
- continue;
-
- if (fieldAlign > unionAlign || fieldSize > unionSize) {
- unionType = fieldType;
- unionAlign = fieldAlign;
- unionSize = fieldSize;
- }
- }
-
- // Now add our field.
- if (unionType) {
- AppendField(CharUnits::Zero(), unionType);
-
- if (getTypeAlignment(unionType) > layout.getAlignment()) {
- // We need a packed struct.
- Packed = true;
- unionAlign = CharUnits::One();
- }
- }
- if (unionAlign.isZero()) {
- (void)hasOnlyZeroSizedBitFields;
- assert(hasOnlyZeroSizedBitFields &&
- "0-align record did not have all zero-sized bit-fields!");
- unionAlign = CharUnits::One();
- }
-
- // Append tail padding.
- CharUnits recordSize = layout.getSize();
- if (recordSize > unionSize)
- AppendPadding(recordSize, unionAlign);
-}
-
-bool CGRecordLayoutBuilder::LayoutBase(const CXXRecordDecl *base,
- const CGRecordLayout &baseLayout,
- CharUnits baseOffset) {
- ResizeLastBaseFieldIfNecessary(baseOffset);
-
- AppendPadding(baseOffset, CharUnits::One());
-
- const ASTRecordLayout &baseASTLayout
- = Types.getContext().getASTRecordLayout(base);
-
- LastLaidOutBase.Offset = NextFieldOffset;
- LastLaidOutBase.NonVirtualSize = baseASTLayout.getNonVirtualSize();
-
- llvm::StructType *subobjectType = baseLayout.getBaseSubobjectLLVMType();
- if (getTypeAlignment(subobjectType) > Alignment)
- return false;
-
- AppendField(baseOffset, subobjectType);
- return true;
-}
-
-bool CGRecordLayoutBuilder::LayoutNonVirtualBase(const CXXRecordDecl *base,
- CharUnits baseOffset) {
- // Ignore empty bases.
- if (base->isEmpty()) return true;
-
- const CGRecordLayout &baseLayout = Types.getCGRecordLayout(base);
- if (IsZeroInitializableAsBase) {
- assert(IsZeroInitializable &&
- "class zero-initializable as base but not as complete object");
-
- IsZeroInitializable = IsZeroInitializableAsBase =
- baseLayout.isZeroInitializableAsBase();
- }
-
- if (!LayoutBase(base, baseLayout, baseOffset))
- return false;
- NonVirtualBases[base] = (FieldTypes.size() - 1);
- return true;
-}
-
-bool
-CGRecordLayoutBuilder::LayoutVirtualBase(const CXXRecordDecl *base,
- CharUnits baseOffset) {
- // Ignore empty bases.
- if (base->isEmpty()) return true;
-
- const CGRecordLayout &baseLayout = Types.getCGRecordLayout(base);
- if (IsZeroInitializable)
- IsZeroInitializable = baseLayout.isZeroInitializableAsBase();
-
- if (!LayoutBase(base, baseLayout, baseOffset))
- return false;
- VirtualBases[base] = (FieldTypes.size() - 1);
- return true;
-}
-
-bool
-CGRecordLayoutBuilder::MSLayoutVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout) {
- if (!RD->getNumVBases())
- return true;
-
- // The vbases list is uniqued and ordered by a depth-first
- // traversal, which is what we need here.
- for (CXXRecordDecl::base_class_const_iterator I = RD->vbases_begin(),
- E = RD->vbases_end(); I != E; ++I) {
-
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
-
- CharUnits vbaseOffset = Layout.getVBaseClassOffset(BaseDecl);
- if (!LayoutVirtualBase(BaseDecl, vbaseOffset))
- return false;
- }
- return true;
-}
-
-/// LayoutVirtualBases - layout the non-virtual bases of a record decl.
-bool
-CGRecordLayoutBuilder::LayoutVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout) {
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
-
- // We only want to lay out virtual bases that aren't indirect primary bases
- // of some other base.
- if (I->isVirtual() && !IndirectPrimaryBases.count(BaseDecl)) {
- // Only lay out the base once.
- if (!LaidOutVirtualBases.insert(BaseDecl))
- continue;
-
- CharUnits vbaseOffset = Layout.getVBaseClassOffset(BaseDecl);
- if (!LayoutVirtualBase(BaseDecl, vbaseOffset))
- return false;
- }
-
- if (!BaseDecl->getNumVBases()) {
- // This base isn't interesting since it doesn't have any virtual bases.
- continue;
- }
-
- if (!LayoutVirtualBases(BaseDecl, Layout))
- return false;
- }
- return true;
-}
-
-bool
-CGRecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD,
- const ASTRecordLayout &Layout) {
- const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase();
-
- // If we have a primary base, lay it out first.
- if (PrimaryBase) {
- if (!Layout.isPrimaryBaseVirtual()) {
- if (!LayoutNonVirtualBase(PrimaryBase, CharUnits::Zero()))
- return false;
- } else {
- if (!LayoutVirtualBase(PrimaryBase, CharUnits::Zero()))
- return false;
- }
-
- // Otherwise, add a vtable / vf-table if the layout says to do so.
- } else if (Layout.hasOwnVFPtr()) {
- llvm::Type *FunctionType =
- llvm::FunctionType::get(llvm::Type::getInt32Ty(Types.getLLVMContext()),
- /*isVarArg=*/true);
- llvm::Type *VTableTy = FunctionType->getPointerTo();
-
- if (getTypeAlignment(VTableTy) > Alignment) {
- // FIXME: Should we allow this to happen in Sema?
- assert(!Packed && "Alignment is wrong even with packed struct!");
- return false;
- }
-
- assert(NextFieldOffset.isZero() &&
- "VTable pointer must come first!");
- AppendField(CharUnits::Zero(), VTableTy->getPointerTo());
- }
-
- // Layout the non-virtual bases.
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
- if (I->isVirtual())
- continue;
-
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
-
- // We've already laid out the primary base.
- if (BaseDecl == PrimaryBase && !Layout.isPrimaryBaseVirtual())
- continue;
-
- if (!LayoutNonVirtualBase(BaseDecl, Layout.getBaseClassOffset(BaseDecl)))
- return false;
- }
-
- // Add a vb-table pointer if the layout insists.
- if (Layout.hasOwnVBPtr()) {
- CharUnits VBPtrOffset = Layout.getVBPtrOffset();
- llvm::Type *Vbptr = llvm::Type::getInt32PtrTy(Types.getLLVMContext());
- AppendPadding(VBPtrOffset, getTypeAlignment(Vbptr));
- AppendField(VBPtrOffset, Vbptr);
- }
-
- return true;
-}
-
-bool
-CGRecordLayoutBuilder::ComputeNonVirtualBaseType(const CXXRecordDecl *RD) {
- const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(RD);
-
- CharUnits NonVirtualSize = Layout.getNonVirtualSize();
- CharUnits NonVirtualAlign = Layout.getNonVirtualAlign();
- CharUnits AlignedNonVirtualTypeSize =
- NonVirtualSize.RoundUpToAlignment(NonVirtualAlign);
-
- // First check if we can use the same fields as for the complete class.
- CharUnits RecordSize = Layout.getSize();
- if (AlignedNonVirtualTypeSize == RecordSize)
- return true;
-
- // Check if we need padding.
- CharUnits AlignedNextFieldOffset =
- NextFieldOffset.RoundUpToAlignment(getAlignmentAsLLVMStruct());
-
- if (AlignedNextFieldOffset > AlignedNonVirtualTypeSize) {
- assert(!Packed && "cannot layout even as packed struct");
- return false; // Needs packing.
- }
-
- bool needsPadding = (AlignedNonVirtualTypeSize != AlignedNextFieldOffset);
- if (needsPadding) {
- CharUnits NumBytes = AlignedNonVirtualTypeSize - AlignedNextFieldOffset;
- FieldTypes.push_back(getByteArrayType(NumBytes));
- }
-
- BaseSubobjectType = llvm::StructType::create(Types.getLLVMContext(),
- FieldTypes, "", Packed);
- Types.addRecordTypeName(RD, BaseSubobjectType, ".base");
-
- // Pull the padding back off.
- if (needsPadding)
- FieldTypes.pop_back();
-
- return true;
-}
-
-bool CGRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
- assert(!D->isUnion() && "Can't call LayoutFields on a union!");
- assert(!Alignment.isZero() && "Did not set alignment!");
-
- const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(D);
-
- const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D);
- if (RD)
- if (!LayoutNonVirtualBases(RD, Layout))
- return false;
-
- unsigned FieldNo = 0;
-
- for (RecordDecl::field_iterator FI = D->field_begin(), FE = D->field_end();
- FI != FE; ++FI, ++FieldNo) {
- FieldDecl *FD = *FI;
-
- // If this field is a bitfield, layout all of the consecutive
- // non-zero-length bitfields and the last zero-length bitfield; these will
- // all share storage.
- if (FD->isBitField()) {
- // If all we have is a zero-width bitfield, skip it.
- if (FD->getBitWidthValue(Types.getContext()) == 0)
- continue;
-
- // Layout this range of bitfields.
- if (!LayoutBitfields(Layout, FieldNo, FI, FE)) {
- assert(!Packed &&
- "Could not layout bitfields even with a packed LLVM struct!");
- return false;
- }
- assert(FI != FE && "Advanced past the last bitfield");
- continue;
- }
-
- if (!LayoutField(FD, Layout.getFieldOffset(FieldNo))) {
- assert(!Packed &&
- "Could not layout fields even with a packed LLVM struct!");
- return false;
- }
- }
-
- if (RD) {
- // We've laid out the non-virtual bases and the fields, now compute the
- // non-virtual base field types.
- if (!ComputeNonVirtualBaseType(RD)) {
- assert(!Packed && "Could not layout even with a packed LLVM struct!");
- return false;
- }
-
- // Lay out the virtual bases. The MS ABI uses a different
- // algorithm here due to the lack of primary virtual bases.
- if (Types.getTarget().getCXXABI().hasPrimaryVBases()) {
- RD->getIndirectPrimaryBases(IndirectPrimaryBases);
- if (Layout.isPrimaryBaseVirtual())
- IndirectPrimaryBases.insert(Layout.getPrimaryBase());
-
- if (!LayoutVirtualBases(RD, Layout))
- return false;
- } else {
- if (!MSLayoutVirtualBases(RD, Layout))
- return false;
- }
- }
-
- // Append tail padding if necessary.
- AppendTailPadding(Layout.getSize());
-
- return true;
-}
-
-void CGRecordLayoutBuilder::AppendTailPadding(CharUnits RecordSize) {
- ResizeLastBaseFieldIfNecessary(RecordSize);
-
- assert(NextFieldOffset <= RecordSize && "Size mismatch!");
-
- CharUnits AlignedNextFieldOffset =
- NextFieldOffset.RoundUpToAlignment(getAlignmentAsLLVMStruct());
-
- if (AlignedNextFieldOffset == RecordSize) {
- // We don't need any padding.
- return;
- }
-
- CharUnits NumPadBytes = RecordSize - NextFieldOffset;
- AppendBytes(NumPadBytes);
-}
-
-void CGRecordLayoutBuilder::AppendField(CharUnits fieldOffset,
- llvm::Type *fieldType) {
- CharUnits fieldSize =
- CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(fieldType));
-
- FieldTypes.push_back(fieldType);
-
- NextFieldOffset = fieldOffset + fieldSize;
-}
-
-void CGRecordLayoutBuilder::AppendPadding(CharUnits fieldOffset,
- CharUnits fieldAlignment) {
- assert(NextFieldOffset <= fieldOffset &&
- "Incorrect field layout!");
-
- // Do nothing if we're already at the right offset.
- if (fieldOffset == NextFieldOffset) return;
-
- // If we're not emitting a packed LLVM type, try to avoid adding
- // unnecessary padding fields.
- if (!Packed) {
- // Round up the field offset to the alignment of the field type.
- CharUnits alignedNextFieldOffset =
- NextFieldOffset.RoundUpToAlignment(fieldAlignment);
- assert(alignedNextFieldOffset <= fieldOffset);
-
- // If that's the right offset, we're done.
- if (alignedNextFieldOffset == fieldOffset) return;
- }
-
- // Otherwise we need explicit padding.
- CharUnits padding = fieldOffset - NextFieldOffset;
- AppendBytes(padding);
-}
-
-bool CGRecordLayoutBuilder::ResizeLastBaseFieldIfNecessary(CharUnits offset) {
- // Check if we have a base to resize.
- if (!LastLaidOutBase.isValid())
- return false;
-
- // This offset does not overlap with the tail padding.
- if (offset >= NextFieldOffset)
- return false;
-
- // Restore the field offset and append an i8 array instead.
- FieldTypes.pop_back();
- NextFieldOffset = LastLaidOutBase.Offset;
- AppendBytes(LastLaidOutBase.NonVirtualSize);
- LastLaidOutBase.invalidate();
-
- return true;
-}
-
-llvm::Type *CGRecordLayoutBuilder::getByteArrayType(CharUnits numBytes) {
- assert(!numBytes.isZero() && "Empty byte arrays aren't allowed.");
-
- llvm::Type *Ty = llvm::Type::getInt8Ty(Types.getLLVMContext());
- if (numBytes > CharUnits::One())
- Ty = llvm::ArrayType::get(Ty, numBytes.getQuantity());
-
- return Ty;
-}
-
-void CGRecordLayoutBuilder::AppendBytes(CharUnits numBytes) {
- if (numBytes.isZero())
- return;
-
- // Append the padding field
- AppendField(NextFieldOffset, getByteArrayType(numBytes));
-}
-
-CharUnits CGRecordLayoutBuilder::getTypeAlignment(llvm::Type *Ty) const {
- if (Packed)
- return CharUnits::One();
-
- return CharUnits::fromQuantity(Types.getDataLayout().getABITypeAlignment(Ty));
-}
-
-CharUnits CGRecordLayoutBuilder::getAlignmentAsLLVMStruct() const {
- if (Packed)
- return CharUnits::One();
-
- CharUnits maxAlignment = CharUnits::One();
- for (size_t i = 0; i != FieldTypes.size(); ++i)
- maxAlignment = std::max(maxAlignment, getTypeAlignment(FieldTypes[i]));
-
- return maxAlignment;
-}
-
-/// Merge in whether a field of the given type is zero-initializable.
-void CGRecordLayoutBuilder::CheckZeroInitializable(QualType T) {
- // This record already contains a member pointer.
- if (!IsZeroInitializableAsBase)
- return;
-
- // Can only have member pointers if we're compiling C++.
- if (!Types.getContext().getLangOpts().CPlusPlus)
- return;
-
- const Type *elementType = T->getBaseElementTypeUnsafe();
-
- if (const MemberPointerType *MPT = elementType->getAs<MemberPointerType>()) {
- if (!Types.getCXXABI().isZeroInitializable(MPT))
- IsZeroInitializable = IsZeroInitializableAsBase = false;
- } else if (const RecordType *RT = elementType->getAs<RecordType>()) {
- const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
- const CGRecordLayout &Layout = Types.getCGRecordLayout(RD);
- if (!Layout.isZeroInitializable())
- IsZeroInitializable = IsZeroInitializableAsBase = false;
- }
-}
-
CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
llvm::StructType *Ty) {
- CGRecordLayoutBuilder Builder(*this);
+ CGRecordLowering Builder(*this, D);
- Builder.Layout(D);
-
- Ty->setBody(Builder.FieldTypes, Builder.Packed);
+ Builder.lower(false);
// If we're in C++, compute the base subobject type.
llvm::StructType *BaseTy = 0;
- if (isa<CXXRecordDecl>(D) && !D->isUnion()) {
- BaseTy = Builder.BaseSubobjectType;
- if (!BaseTy) BaseTy = Ty;
+ if (isa<CXXRecordDecl>(D) && !D->isUnion() && !D->hasAttr<FinalAttr>()) {
+ BaseTy = Ty;
+ if (Builder.Layout.getNonVirtualSize() != Builder.Layout.getSize()) {
+ CGRecordLowering BaseBuilder(*this, D);
+ BaseBuilder.lower(true);
+ BaseTy = llvm::StructType::create(
+ getLLVMContext(), BaseBuilder.FieldTypes, "", BaseBuilder.Packed);
+ addRecordTypeName(D, BaseTy, ".base");
+ }
}
+ // Fill in the struct *after* computing the base type. Filling in the body
+ // signifies that the type is no longer opaque and record layout is complete,
+ // but we may need to recursively layout D while laying D out as a base type.
+ Ty->setBody(Builder.FieldTypes, Builder.Packed);
+
CGRecordLayout *RL =
new CGRecordLayout(Ty, BaseTy, Builder.IsZeroInitializable,
- Builder.IsZeroInitializableAsBase);
+ Builder.IsZeroInitializableAsBase);
RL->NonVirtualBases.swap(Builder.NonVirtualBases);
RL->CompleteObjectVirtualBases.swap(Builder.VirtualBases);
@@ -994,12 +676,9 @@
if (BaseTy) {
CharUnits NonVirtualSize = Layout.getNonVirtualSize();
- CharUnits NonVirtualAlign = Layout.getNonVirtualAlign();
- CharUnits AlignedNonVirtualTypeSize =
- NonVirtualSize.RoundUpToAlignment(NonVirtualAlign);
uint64_t AlignedNonVirtualTypeSizeInBits =
- getContext().toBits(AlignedNonVirtualTypeSize);
+ getContext().toBits(NonVirtualSize);
assert(AlignedNonVirtualTypeSizeInBits ==
getDataLayout().getTypeAllocSizeInBits(BaseTy) &&
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 0bc51dd..cba9e6c 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -16,14 +16,14 @@
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/StmtVisitor.h"
-#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
using namespace clang;
using namespace CodeGen;
@@ -43,6 +43,7 @@
void CodeGenFunction::EmitStmt(const Stmt *S) {
assert(S && "Null statement?");
+ PGO.setCurrentStmt(S);
// These statements have their own debug info handling.
if (EmitSimpleStmt(S))
@@ -76,6 +77,7 @@
case Stmt::SEHFinallyStmtClass:
case Stmt::MSDependentExistsStmtClass:
case Stmt::OMPParallelDirectiveClass:
+ case Stmt::OMPSimdDirectiveClass:
llvm_unreachable("invalid statement class to emit generically");
case Stmt::NullStmtClass:
case Stmt::CompoundStmtClass:
@@ -242,7 +244,7 @@
EmitAnyExprToMem(cast<Expr>(LastStmt), RetAlloca, Qualifiers(),
/*IsInit*/false);
}
-
+
}
return RetAlloca;
@@ -309,9 +311,8 @@
void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
bool inserted = false;
- for (llvm::BasicBlock::use_iterator
- i = block->use_begin(), e = block->use_end(); i != e; ++i) {
- if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(*i)) {
+ for (llvm::User *u : block->users()) {
+ if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
CurFn->getBasicBlockList().insertAfter(insn->getParent(), block);
inserted = true;
break;
@@ -358,7 +359,9 @@
ResolveBranchFixups(Dest.getBlock());
}
+ RegionCounter Cnt = getPGORegionCounter(D->getStmt());
EmitBlock(Dest.getBlock());
+ Cnt.beginRegion(Builder);
}
/// Change the cleanup scope of the labels in this lexical scope to
@@ -430,6 +433,7 @@
// C99 6.8.4.1: The first substatement is executed if the expression compares
// unequal to 0. The condition must be a scalar type.
LexicalScope ConditionScope(*this, S.getSourceRange());
+ RegionCounter Cnt = getPGORegionCounter(&S);
if (S.getConditionVariable())
EmitAutoVarDecl(*S.getConditionVariable());
@@ -447,6 +451,8 @@
// If the skipped block has no labels in it, just emit the executed block.
// This avoids emitting dead code and simplifies the CFG substantially.
if (!ContainsLabel(Skipped)) {
+ if (CondConstant)
+ Cnt.beginRegion(Builder);
if (Executed) {
RunCleanupsScope ExecutedScope(*this);
EmitStmt(Executed);
@@ -462,10 +468,12 @@
llvm::BasicBlock *ElseBlock = ContBlock;
if (S.getElse())
ElseBlock = createBasicBlock("if.else");
- EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
+
+ EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
// Emit the 'then' code.
- EmitBlock(ThenBlock);
+ EmitBlock(ThenBlock);
+ Cnt.beginRegion(Builder);
{
RunCleanupsScope ThenScope(*this);
EmitStmt(S.getThen());
@@ -493,6 +501,8 @@
}
void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Emit the header for the loop, which will also become
// the continue target.
JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
@@ -535,8 +545,8 @@
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
if (ConditionScope.requiresCleanups())
ExitBlock = createBasicBlock("while.exit");
-
- Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
+ Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
+ PGO.createLoopWeights(S.getCond(), Cnt));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -549,6 +559,7 @@
{
RunCleanupsScope BodyScope(*this);
EmitBlock(LoopBody);
+ Cnt.beginRegion(Builder);
EmitStmt(S.getBody());
}
@@ -573,19 +584,19 @@
JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Store the blocks to use for break and continue.
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
// Emit the body of the loop.
llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
- EmitBlock(LoopBody);
+ EmitBlockWithFallThrough(LoopBody, Cnt);
{
RunCleanupsScope BodyScope(*this);
EmitStmt(S.getBody());
}
- BreakContinueStack.pop_back();
-
EmitBlock(LoopCond.getBlock());
// C99 6.8.5.2: "The evaluation of the controlling expression takes place
@@ -596,6 +607,8 @@
// compares unequal to 0. The condition must be a scalar type.
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
+ BreakContinueStack.pop_back();
+
// "do {} while (0)" is common in macros, avoid extra blocks. Be sure
// to correctly handle break/continue though.
bool EmitBoolCondBranch = true;
@@ -605,7 +618,8 @@
// As long as the condition is true, iterate the loop.
if (EmitBoolCondBranch)
- Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
+ Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
+ PGO.createLoopWeights(S.getCond(), Cnt));
// Emit the exit block.
EmitBlock(LoopExit.getBlock());
@@ -629,6 +643,8 @@
if (S.getInit())
EmitStmt(S.getInit());
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Start the loop with a block that tests the condition.
// If there's an increment, the continue scope will be overwritten
// later.
@@ -636,6 +652,16 @@
llvm::BasicBlock *CondBlock = Continue.getBlock();
EmitBlock(CondBlock);
+ // If the for loop doesn't have an increment we can just use the
+ // condition as the continue block. Otherwise we'll need to create
+ // a block for it (in the current scope, i.e. in the scope of the
+ // condition), and that we will become our continue block.
+ if (S.getInc())
+ Continue = getJumpDestInCurrentScope("for.inc");
+
+ // Store the blocks to use for break and continue.
+ BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+
// Create a cleanup scope for the condition variable cleanups.
RunCleanupsScope ConditionScope(*this);
@@ -657,7 +683,9 @@
// C99 6.8.5p2/p4: The first substatement is executed if the expression
// compares unequal to 0. The condition must be a scalar type.
- EmitBranchOnBoolExpr(S.getCond(), ForBody, ExitBlock);
+ llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
+ Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock,
+ PGO.createLoopWeights(S.getCond(), Cnt));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -669,16 +697,7 @@
// Treat it as a non-zero constant. Don't even create a new block for the
// body, just fall into it.
}
-
- // If the for loop doesn't have an increment we can just use the
- // condition as the continue block. Otherwise we'll need to create
- // a block for it (in the current scope, i.e. in the scope of the
- // condition), and that we will become our continue block.
- if (S.getInc())
- Continue = getJumpDestInCurrentScope("for.inc");
-
- // Store the blocks to use for break and continue.
- BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+ Cnt.beginRegion(Builder);
{
// Create a separate cleanup scope for the body, in case it is not
@@ -720,6 +739,8 @@
EmitStmt(S.getRangeStmt());
EmitStmt(S.getBeginEndStmt());
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Start the loop with a block that tests the condition.
// If there's an increment, the continue scope will be overwritten
// later.
@@ -737,7 +758,9 @@
// The body is executed if the expression, contextually converted
// to bool, is true.
- EmitBranchOnBoolExpr(S.getCond(), ForBody, ExitBlock);
+ llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
+ Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock,
+ PGO.createLoopWeights(S.getCond(), Cnt));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -745,6 +768,7 @@
}
EmitBlock(ForBody);
+ Cnt.beginRegion(Builder);
// Create a block for the increment. In case of a 'continue', we jump there.
JumpDest Continue = getJumpDestInCurrentScope("for.inc");
@@ -809,7 +833,8 @@
// FIXME: Clean this up by using an LValue for ReturnTemp,
// EmitStoreThroughLValue, and EmitAnyExpr.
- if (S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
+ if (getLangOpts().ElideConstructors &&
+ S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
// Apply the named return value optimization for this return statement,
// which means doing nothing: the appropriate result has already been
// constructed into the NRVO variable.
@@ -818,7 +843,7 @@
// that the cleanup code should not destroy the variable.
if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
Builder.CreateStore(Builder.getTrue(), NRVOFlag);
- } else if (!ReturnValue) {
+ } else if (!ReturnValue || (RV && RV->getType()->isVoidType())) {
// Make sure not to return anything, but evaluate the expression
// for side effects.
if (RV)
@@ -866,9 +891,8 @@
if (HaveInsertPoint())
EmitStopPoint(&S);
- for (DeclStmt::const_decl_iterator I = S.decl_begin(), E = S.decl_end();
- I != E; ++I)
- EmitDecl(**I);
+ for (const auto *I : S.decls())
+ EmitDecl(*I);
}
void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
@@ -880,8 +904,7 @@
if (HaveInsertPoint())
EmitStopPoint(&S);
- JumpDest Block = BreakContinueStack.back().BreakBlock;
- EmitBranchThroughCleanup(Block);
+ EmitBranchThroughCleanup(BreakContinueStack.back().BreakBlock);
}
void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
@@ -893,8 +916,7 @@
if (HaveInsertPoint())
EmitStopPoint(&S);
- JumpDest Block = BreakContinueStack.back().ContinueBlock;
- EmitBranchThroughCleanup(Block);
+ EmitBranchThroughCleanup(BreakContinueStack.back().ContinueBlock);
}
/// EmitCaseStmtRange - If case statement range is not too big then
@@ -906,11 +928,13 @@
llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
+ RegionCounter CaseCnt = getPGORegionCounter(&S);
+
// Emit the code for this case. We do this first to make sure it is
// properly chained from our predecessor before generating the
// switch machinery to enter this block.
- EmitBlock(createBasicBlock("sw.bb"));
- llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+ llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
+ EmitBlockWithFallThrough(CaseDest, CaseCnt);
EmitStmt(S.getSubStmt());
// If range is empty, do nothing.
@@ -921,7 +945,18 @@
// FIXME: parameters such as this should not be hardcoded.
if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
// Range is small enough to add multiple switch instruction cases.
- for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
+ uint64_t Total = CaseCnt.getCount();
+ unsigned NCases = Range.getZExtValue() + 1;
+ // We only have one region counter for the entire set of cases here, so we
+ // need to divide the weights evenly between the generated cases, ensuring
+ // that the total weight is preserved. E.g., a weight of 5 over three cases
+ // will be distributed as weights of 2, 2, and 1.
+ uint64_t Weight = Total / NCases, Rem = Total % NCases;
+ for (unsigned I = 0; I != NCases; ++I) {
+ if (SwitchWeights)
+ SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
+ if (Rem)
+ Rem--;
SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
LHS++;
}
@@ -946,7 +981,19 @@
Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
llvm::Value *Cond =
Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
- Builder.CreateCondBr(Cond, CaseDest, FalseDest);
+
+ llvm::MDNode *Weights = 0;
+ if (SwitchWeights) {
+ uint64_t ThisCount = CaseCnt.getCount();
+ uint64_t DefaultCount = (*SwitchWeights)[0];
+ Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
+
+ // Since we're chaining the switch default through each large case range, we
+ // need to update the weight for the default, ie, the first case, to include
+ // this case.
+ (*SwitchWeights)[0] += ThisCount;
+ }
+ Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
// Restore the appropriate insertion point.
if (RestoreBB)
@@ -959,7 +1006,7 @@
// If there is no enclosing switch instance that we're aware of, then this
// case statement and its block can be elided. This situation only happens
// when we've constant-folded the switch, are emitting the constant case,
- // and part of the constant case includes another case statement. For
+ // and part of the constant case includes another case statement. For
// instance: switch (4) { case 4: do { case 5: } while (1); }
if (!SwitchInsn) {
EmitStmt(S.getSubStmt());
@@ -972,17 +1019,22 @@
return;
}
+ RegionCounter CaseCnt = getPGORegionCounter(&S);
llvm::ConstantInt *CaseVal =
Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
- // If the body of the case is just a 'break', and if there was no fallthrough,
- // try to not emit an empty block.
- if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
+ // If the body of the case is just a 'break', try to not emit an empty block.
+ // If we're profiling or we're not optimizing, leave the block in for better
+ // debug and coverage analysis.
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+ CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isa<BreakStmt>(S.getSubStmt())) {
JumpDest Block = BreakContinueStack.back().BreakBlock;
// Only do this optimization if there are no cleanups that need emitting.
if (isObviouslyBranchWithoutCleanups(Block)) {
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount());
SwitchInsn->addCase(CaseVal, Block.getBlock());
// If there was a fallthrough into this case, make sure to redirect it to
@@ -995,8 +1047,10 @@
}
}
- EmitBlock(createBasicBlock("sw.bb"));
- llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+ llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
+ EmitBlockWithFallThrough(CaseDest, CaseCnt);
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount());
SwitchInsn->addCase(CaseVal, CaseDest);
// Recursively emitting the statement is acceptable, but is not wonderful for
@@ -1014,8 +1068,17 @@
// Otherwise, iteratively add consecutive cases to this switch stmt.
while (NextCase && NextCase->getRHS() == 0) {
CurCase = NextCase;
- llvm::ConstantInt *CaseVal =
+ llvm::ConstantInt *CaseVal =
Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
+
+ CaseCnt = getPGORegionCounter(NextCase);
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount());
+ if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+ CaseDest = createBasicBlock("sw.bb");
+ EmitBlockWithFallThrough(CaseDest, CaseCnt);
+ }
+
SwitchInsn->addCase(CaseVal, CaseDest);
NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
}
@@ -1028,7 +1091,10 @@
llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
assert(DefaultBlock->empty() &&
"EmitDefaultStmt: Default block already defined?");
- EmitBlock(DefaultBlock);
+
+ RegionCounter Cnt = getPGORegionCounter(&S);
+ EmitBlockWithFallThrough(DefaultBlock, Cnt);
+
EmitStmt(S.getSubStmt());
}
@@ -1185,7 +1251,8 @@
static bool FindCaseStatementsForValue(const SwitchStmt &S,
const llvm::APSInt &ConstantCondValue,
SmallVectorImpl<const Stmt*> &ResultStmts,
- ASTContext &C) {
+ ASTContext &C,
+ const SwitchCase *&ResultCase) {
// First step, find the switch case that is being branched to. We can do this
// efficiently by scanning the SwitchCase list.
const SwitchCase *Case = S.getSwitchCaseList();
@@ -1228,6 +1295,7 @@
// while (1) {
// case 4: ...
bool FoundCase = false;
+ ResultCase = Case;
return CollectStatementsForCase(S.getBody(), Case, FoundCase,
ResultStmts) != CSFC_Failure &&
FoundCase;
@@ -1243,6 +1311,7 @@
// Handle nested switch statements.
llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
+ SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
// See if we can constant fold the condition of the switch and therefore only
@@ -1250,8 +1319,13 @@
llvm::APSInt ConstantCondValue;
if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
SmallVector<const Stmt*, 4> CaseStmts;
+ const SwitchCase *Case = 0;
if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
- getContext())) {
+ getContext(), Case)) {
+ if (Case) {
+ RegionCounter CaseCnt = getPGORegionCounter(Case);
+ CaseCnt.beginRegion(Builder);
+ }
RunCleanupsScope ExecutedScope(*this);
// At this point, we are no longer "within" a switch instance, so
@@ -1263,6 +1337,8 @@
// specified series of statements and we're good.
for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
EmitStmt(CaseStmts[i]);
+ RegionCounter ExitCnt = getPGORegionCounter(&S);
+ ExitCnt.beginRegion(Builder);
// Now we want to restore the saved switch instance so that nested
// switches continue to function properly
@@ -1280,12 +1356,29 @@
// failure.
llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
+ if (PGO.haveRegionCounts()) {
+ // Walk the SwitchCase list to find how many there are.
+ uint64_t DefaultCount = 0;
+ unsigned NumCases = 0;
+ for (const SwitchCase *Case = S.getSwitchCaseList();
+ Case;
+ Case = Case->getNextSwitchCase()) {
+ if (isa<DefaultStmt>(Case))
+ DefaultCount = getPGORegionCounter(Case).getCount();
+ NumCases += 1;
+ }
+ SwitchWeights = new SmallVector<uint64_t, 16>();
+ SwitchWeights->reserve(NumCases);
+ // The default needs to be first. We store the edge count, so we already
+ // know the right weight.
+ SwitchWeights->push_back(DefaultCount);
+ }
CaseRangeBlock = DefaultBlock;
// Clear the insertion point to indicate we are in unreachable code.
Builder.ClearInsertionPoint();
- // All break statements jump to NextBlock. If BreakContinueStack is non empty
+ // All break statements jump to NextBlock. If BreakContinueStack is non-empty
// then reuse last ContinueBlock.
JumpDest OuterContinue;
if (!BreakContinueStack.empty())
@@ -1320,8 +1413,20 @@
// Emit continuation.
EmitBlock(SwitchExit.getBlock(), true);
+ RegionCounter ExitCnt = getPGORegionCounter(&S);
+ ExitCnt.beginRegion(Builder);
+ if (SwitchWeights) {
+ assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
+ "switch weights do not match switch cases");
+ // If there's only one jump destination there's no sense weighting it.
+ if (SwitchWeights->size() > 1)
+ SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
+ PGO.createBranchWeights(*SwitchWeights));
+ delete SwitchWeights;
+ }
SwitchInsn = SavedSwitchInsn;
+ SwitchWeights = SavedSwitchWeights;
CaseRangeBlock = SavedCRBlock;
}
@@ -1493,7 +1598,7 @@
Name = GAS->getOutputName(i);
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
- assert(IsValid && "Failed to parse output constraint");
+ assert(IsValid && "Failed to parse output constraint");
OutputConstraintInfos.push_back(Info);
}
@@ -1829,8 +1934,8 @@
// Create the function declaration.
FunctionType::ExtInfo ExtInfo;
const CGFunctionInfo &FuncInfo =
- CGM.getTypes().arrangeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
- /*IsVariadic=*/false);
+ CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+ /*IsVariadic=*/false);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
llvm::Function *F =
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index bfff470..bd280ea 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -66,7 +66,8 @@
if (VTTVT.getBase() == RD) {
// Just get the address point for the regular vtable.
AddressPoint =
- ItaniumVTContext.getVTableLayout(RD).getAddressPoint(i->VTableBase);
+ getItaniumVTableContext().getVTableLayout(RD).getAddressPoint(
+ i->VTableBase);
assert(AddressPoint != 0 && "Did not find vtable address point!");
} else {
AddressPoint = VTableAddressPoints[i->VTableIndex].lookup(i->VTableBase);
@@ -94,7 +95,7 @@
VTT->setLinkage(Linkage);
// Set the right visibility.
- CGM.setTypeVisibility(VTT, RD, CodeGenModule::TVK_ForVTT);
+ CGM.setGlobalVisibility(VTT, RD);
}
llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index f28d9b6..0ad765c 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -30,14 +30,7 @@
using namespace CodeGen;
CodeGenVTables::CodeGenVTables(CodeGenModule &CGM)
- : CGM(CGM), ItaniumVTContext(CGM.getContext()) {
- if (CGM.getTarget().getCXXABI().isMicrosoft()) {
- // FIXME: Eventually, we should only have one of V*TContexts available.
- // Today we use both in the Microsoft ABI as MicrosoftVFTableContext
- // is not completely supported in CodeGen yet.
- MicrosoftVTContext.reset(new MicrosoftVTableContext(CGM.getContext()));
- }
-}
+ : CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {}
llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD,
const ThunkInfo &Thunk) {
@@ -54,54 +47,13 @@
Out.flush();
llvm::Type *Ty = getTypes().GetFunctionTypeForVTable(GD);
- return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true);
+ return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true,
+ /*DontDefer*/ true);
}
static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD,
const ThunkInfo &Thunk, llvm::Function *Fn) {
CGM.setGlobalVisibility(Fn, MD);
-
- if (!CGM.getCodeGenOpts().HiddenWeakVTables)
- return;
-
- // If the thunk has weak/linkonce linkage, but the function must be
- // emitted in every translation unit that references it, then we can
- // emit its thunks with hidden visibility, since its thunks must be
- // emitted when the function is.
-
- // This follows CodeGenModule::setTypeVisibility; see the comments
- // there for explanation.
-
- if ((Fn->getLinkage() != llvm::GlobalVariable::LinkOnceODRLinkage &&
- Fn->getLinkage() != llvm::GlobalVariable::WeakODRLinkage) ||
- Fn->getVisibility() != llvm::GlobalVariable::DefaultVisibility)
- return;
-
- if (MD->getExplicitVisibility(ValueDecl::VisibilityForValue))
- return;
-
- switch (MD->getTemplateSpecializationKind()) {
- case TSK_ExplicitInstantiationDefinition:
- case TSK_ExplicitInstantiationDeclaration:
- return;
-
- case TSK_Undeclared:
- break;
-
- case TSK_ExplicitSpecialization:
- case TSK_ImplicitInstantiation:
- return;
- break;
- }
-
- // If there's an explicit definition, and that definition is
- // out-of-line, then we can't assume that all users will have a
- // definition to emit.
- const FunctionDecl *Def = 0;
- if (MD->hasBody(Def) && Def->isOutOfLine())
- return;
-
- Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
}
#ifndef NDEBUG
@@ -177,7 +129,7 @@
GlobalDecl GD, const ThunkInfo &Thunk) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
- QualType ResultType = FPT->getResultType();
+ QualType ResultType = FPT->getReturnType();
// Get the original function
assert(FnInfo.isVariadic());
@@ -248,11 +200,11 @@
QualType ThisType = MD->getThisType(getContext());
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
QualType ResultType =
- CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getResultType();
+ CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getReturnType();
FunctionArgList FunctionArgs;
// Create the implicit 'this' parameter declaration.
- CGM.getCXXABI().BuildInstanceFunctionParams(*this, ResultType, FunctionArgs);
+ CGM.getCXXABI().buildThisParam(*this, FunctionArgs);
// Add the rest of the parameters.
for (FunctionDecl::param_const_iterator I = MD->param_begin(),
@@ -260,6 +212,9 @@
I != E; ++I)
FunctionArgs.push_back(*I);
+ if (isa<CXXDestructorDecl>(MD))
+ CGM.getCXXABI().addImplicitStructorParams(*this, ResultType, FunctionArgs);
+
// Start defining the function.
StartFunction(GlobalDecl(), ResultType, Fn, FnInfo, FunctionArgs,
SourceLocation());
@@ -316,7 +271,7 @@
// Determine whether we have a return value slot to use.
QualType ResultType =
- CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getResultType();
+ CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getReturnType();
ReturnValueSlot Slot;
if (!ResultType->isVoidType() &&
CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
@@ -461,12 +416,8 @@
if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
return;
- const VTableContextBase::ThunkInfoVectorTy *ThunkInfoVector;
- if (MicrosoftVTContext.isValid()) {
- ThunkInfoVector = MicrosoftVTContext->getThunkInfo(GD);
- } else {
- ThunkInfoVector = ItaniumVTContext.getThunkInfo(GD);
- }
+ const VTableContextBase::ThunkInfoVectorTy *ThunkInfoVector =
+ VTContext->getThunkInfo(GD);
if (!ThunkInfoVector)
return;
@@ -603,8 +554,8 @@
if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
DI->completeClassData(Base.getBase());
- OwningPtr<VTableLayout> VTLayout(
- ItaniumVTContext.createConstructionVTableLayout(
+ std::unique_ptr<VTableLayout> VTLayout(
+ getItaniumVTableContext().createConstructionVTableLayout(
Base.getBase(), Base.getBaseOffset(), BaseIsVirtual, RD));
// Add the address points.
@@ -633,7 +584,7 @@
// Create the variable that will hold the construction vtable.
llvm::GlobalVariable *VTable =
CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, Linkage);
- CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForConstructionVTable);
+ CGM.setGlobalVisibility(VTable, RD);
// V-tables are always unnamed_addr.
VTable->setUnnamedAddr(true);
@@ -752,7 +703,7 @@
/// strongly elsewhere. Otherwise, we'd just like to avoid emitting
/// v-tables when unnecessary.
bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
- assert(RD->isDynamicClass() && "Non dynamic classes have no VTable.");
+ assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable.");
// If we have an explicit instantiation declaration (and not a
// definition), the v-table is defined elsewhere.
diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h
index e8cd55e..e1554be 100644
--- a/lib/CodeGen/CGVTables.h
+++ b/lib/CodeGen/CGVTables.h
@@ -31,11 +31,8 @@
class CodeGenVTables {
CodeGenModule &CGM;
- // FIXME: Consider moving ItaniumVTContext and MicrosoftVTContext into
- // respective CXXABI classes?
- ItaniumVTableContext ItaniumVTContext;
- OwningPtr<MicrosoftVTableContext> MicrosoftVTContext;
-
+ VTableContextBase *VTContext;
+
/// VTableAddressPointsMapTy - Address points for a single vtable.
typedef llvm::DenseMap<BaseSubobject, uint64_t> VTableAddressPointsMapTy;
@@ -72,10 +69,12 @@
CodeGenVTables(CodeGenModule &CGM);
- ItaniumVTableContext &getItaniumVTableContext() { return ItaniumVTContext; }
+ ItaniumVTableContext &getItaniumVTableContext() {
+ return *cast<ItaniumVTableContext>(VTContext);
+ }
MicrosoftVTableContext &getMicrosoftVTableContext() {
- return *MicrosoftVTContext.get();
+ return *cast<MicrosoftVTableContext>(VTContext);
}
/// getSubVTTIndex - Return the index of the sub-VTT for the base class of the
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 83dbbf0..12cb1ab 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,12 +1,18 @@
set(LLVM_LINK_COMPONENTS
- asmparser
- bitreader
- bitwriter
- irreader
- instrumentation
- ipo
- linker
- vectorize
+ BitReader
+ BitWriter
+ Core
+ IPO
+ IRReader
+ InstCombine
+ Instrumentation
+ Linker
+ MC
+ ObjCARCOpts
+ ScalarOpts
+ Support
+ Target
+ TransformUtils
)
add_clang_library(clangCodeGen
@@ -14,12 +20,12 @@
CGAtomic.cpp
CGBlocks.cpp
CGBuiltin.cpp
- CGCall.cpp
- CGClass.cpp
CGCUDANV.cpp
CGCUDARuntime.cpp
CGCXX.cpp
CGCXXABI.cpp
+ CGCall.cpp
+ CGClass.cpp
CGCleanup.cpp
CGDebugInfo.cpp
CGDecl.cpp
@@ -27,45 +33,36 @@
CGException.cpp
CGExpr.cpp
CGExprAgg.cpp
+ CGExprCXX.cpp
CGExprComplex.cpp
CGExprConstant.cpp
- CGExprCXX.cpp
CGExprScalar.cpp
CGObjC.cpp
CGObjCGNU.cpp
CGObjCMac.cpp
CGObjCRuntime.cpp
CGOpenCLRuntime.cpp
- CGRecordLayoutBuilder.cpp
CGRTTI.cpp
+ CGRecordLayoutBuilder.cpp
CGStmt.cpp
- CGVTables.cpp
CGVTT.cpp
+ CGVTables.cpp
CodeGenABITypes.cpp
CodeGenAction.cpp
CodeGenFunction.cpp
CodeGenModule.cpp
+ CodeGenPGO.cpp
CodeGenTBAA.cpp
CodeGenTypes.cpp
ItaniumCXXABI.cpp
MicrosoftCXXABI.cpp
- MicrosoftVBTables.cpp
ModuleBuilder.cpp
TargetInfo.cpp
- )
-add_dependencies(clangCodeGen
- ClangARMNeon
- ClangAttrClasses
- ClangAttrList
- ClangCommentNodes
- ClangDeclNodes
- ClangDiagnosticCommon
- ClangDiagnosticFrontend
- ClangStmtNodes
- )
+ DEPENDS
+ intrinsics_gen
-target_link_libraries(clangCodeGen
+ LINK_LIBS
clangBasic
clangAST
clangFrontend
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index 18c836c..fba7184 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -17,23 +17,23 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/CodeGenABITypes.h"
-
-#include "clang/CodeGen/CGFunctionInfo.h"
#include "CodeGenModule.h"
+#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Frontend/CodeGenOptions.h"
using namespace clang;
using namespace CodeGen;
CodeGenABITypes::CodeGenABITypes(ASTContext &C,
- const CodeGenOptions &CodeGenOpts,
llvm::Module &M,
- const llvm::DataLayout &TD,
- DiagnosticsEngine &Diags)
- : CGM(new CodeGen::CodeGenModule(C, CodeGenOpts, M, TD, Diags)) {
+ const llvm::DataLayout &TD)
+ : CGO(new CodeGenOptions),
+ CGM(new CodeGen::CodeGenModule(C, *CGO, M, TD, C.getDiagnostics())) {
}
CodeGenABITypes::~CodeGenABITypes()
{
+ delete CGO;
delete CGM;
}
@@ -60,10 +60,10 @@
}
const CGFunctionInfo &
-CodeGenABITypes::arrangeLLVMFunctionInfo(CanQualType returnType,
+CodeGenABITypes::arrangeFreeFunctionCall(CanQualType returnType,
llvm::ArrayRef<CanQualType> argTypes,
FunctionType::ExtInfo info,
RequiredArgs args) {
- return CGM->getTypes().arrangeLLVMFunctionInfo(returnType, argTypes,
- info, args);
+ return CGM->getTypes().arrangeLLVMFunctionInfo(
+ returnType, /*IsInstanceMethod=*/false, argTypes, info, args);
}
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 3072204..573f8e9 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -18,17 +18,19 @@
#include "clang/CodeGen/ModuleBuilder.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker.h"
+#include "llvm/Linker/Linker.h"
#include "llvm/Pass.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Timer.h"
+#include <memory>
using namespace clang;
using namespace llvm;
@@ -45,42 +47,33 @@
Timer LLVMIRGeneration;
- OwningPtr<CodeGenerator> Gen;
+ std::unique_ptr<CodeGenerator> Gen;
- OwningPtr<llvm::Module> TheModule, LinkModule;
+ std::unique_ptr<llvm::Module> TheModule, LinkModule;
public:
BackendConsumer(BackendAction action, DiagnosticsEngine &_Diags,
const CodeGenOptions &compopts,
const TargetOptions &targetopts,
- const LangOptions &langopts,
- bool TimePasses,
- const std::string &infile,
- llvm::Module *LinkModule,
- raw_ostream *OS,
- LLVMContext &C) :
- Diags(_Diags),
- Action(action),
- CodeGenOpts(compopts),
- TargetOpts(targetopts),
- LangOpts(langopts),
- AsmOutStream(OS),
- Context(),
- LLVMIRGeneration("LLVM IR Generation Time"),
- Gen(CreateLLVMCodeGen(Diags, infile, compopts, targetopts, C)),
- LinkModule(LinkModule)
- {
+ const LangOptions &langopts, bool TimePasses,
+ const std::string &infile, llvm::Module *LinkModule,
+ raw_ostream *OS, LLVMContext &C)
+ : Diags(_Diags), Action(action), CodeGenOpts(compopts),
+ TargetOpts(targetopts), LangOpts(langopts), AsmOutStream(OS),
+ Context(), LLVMIRGeneration("LLVM IR Generation Time"),
+ Gen(CreateLLVMCodeGen(Diags, infile, compopts, targetopts, C)),
+ LinkModule(LinkModule) {
llvm::TimePassesIsEnabled = TimePasses;
}
- llvm::Module *takeModule() { return TheModule.take(); }
- llvm::Module *takeLinkModule() { return LinkModule.take(); }
+ llvm::Module *takeModule() { return TheModule.release(); }
+ llvm::Module *takeLinkModule() { return LinkModule.release(); }
- virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+ void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
Gen->HandleCXXStaticMemberVarInstantiation(VD);
}
- virtual void Initialize(ASTContext &Ctx) {
+ void Initialize(ASTContext &Ctx) override {
Context = &Ctx;
if (llvm::TimePassesIsEnabled)
@@ -94,7 +87,7 @@
LLVMIRGeneration.stopTimer();
}
- virtual bool HandleTopLevelDecl(DeclGroupRef D) {
+ bool HandleTopLevelDecl(DeclGroupRef D) override {
PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
Context->getSourceManager(),
"LLVM IR generation of declaration");
@@ -110,7 +103,7 @@
return true;
}
- virtual void HandleTranslationUnit(ASTContext &C) {
+ void HandleTranslationUnit(ASTContext &C) override {
{
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
if (llvm::TimePassesIsEnabled)
@@ -132,7 +125,7 @@
if (!M) {
// The module has been released by IR gen on failures, do not double
// free.
- TheModule.take();
+ TheModule.release();
return;
}
@@ -158,41 +151,49 @@
void *OldContext = Ctx.getInlineAsmDiagnosticContext();
Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this);
+ LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+ Ctx.getDiagnosticHandler();
+ void *OldDiagnosticContext = Ctx.getDiagnosticContext();
+ Ctx.setDiagnosticHandler(DiagnosticHandler, this);
+
EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
+ C.getTargetInfo().getTargetDescription(),
TheModule.get(), Action, AsmOutStream);
-
+
Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
+
+ Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext);
}
- virtual void HandleTagDeclDefinition(TagDecl *D) {
+ void HandleTagDeclDefinition(TagDecl *D) override {
PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
Context->getSourceManager(),
"LLVM IR generation of declaration");
Gen->HandleTagDeclDefinition(D);
}
- virtual void HandleTagDeclRequiredDefinition(const TagDecl *D) {
+ void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
Gen->HandleTagDeclRequiredDefinition(D);
}
- virtual void CompleteTentativeDefinition(VarDecl *D) {
+ void CompleteTentativeDefinition(VarDecl *D) override {
Gen->CompleteTentativeDefinition(D);
}
- virtual void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) {
+ void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
Gen->HandleVTable(RD, DefinitionRequired);
}
- virtual void HandleLinkerOptionPragma(llvm::StringRef Opts) {
+ void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
Gen->HandleLinkerOptionPragma(Opts);
}
- virtual void HandleDetectMismatch(llvm::StringRef Name,
- llvm::StringRef Value) {
+ void HandleDetectMismatch(llvm::StringRef Name,
+ llvm::StringRef Value) override {
Gen->HandleDetectMismatch(Name, Value);
}
- virtual void HandleDependentLibrary(llvm::StringRef Opts) {
+ void HandleDependentLibrary(llvm::StringRef Opts) override {
Gen->HandleDependentLibrary(Opts);
}
@@ -202,8 +203,23 @@
((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc);
}
+ static void DiagnosticHandler(const llvm::DiagnosticInfo &DI,
+ void *Context) {
+ ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI);
+ }
+
void InlineAsmDiagHandler2(const llvm::SMDiagnostic &,
SourceLocation LocCookie);
+
+ void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
+ /// \brief Specialized handler for InlineAsm diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D);
+ /// \brief Specialized handler for StackSize diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
};
void BackendConsumer::anchor() {}
@@ -282,7 +298,116 @@
Diags.Report(Loc, diag::err_fe_inline_asm).AddString(Message);
}
-//
+#define ComputeDiagID(Severity, GroupName, DiagID) \
+ do { \
+ switch (Severity) { \
+ case llvm::DS_Error: \
+ DiagID = diag::err_fe_##GroupName; \
+ break; \
+ case llvm::DS_Warning: \
+ DiagID = diag::warn_fe_##GroupName; \
+ break; \
+ case llvm::DS_Remark: \
+ llvm_unreachable("'remark' severity not expected"); \
+ break; \
+ case llvm::DS_Note: \
+ DiagID = diag::note_fe_##GroupName; \
+ break; \
+ } \
+ } while (false)
+
+#define ComputeDiagRemarkID(Severity, GroupName, DiagID) \
+ do { \
+ switch (Severity) { \
+ case llvm::DS_Error: \
+ DiagID = diag::err_fe_##GroupName; \
+ break; \
+ case llvm::DS_Warning: \
+ DiagID = diag::warn_fe_##GroupName; \
+ break; \
+ case llvm::DS_Remark: \
+ DiagID = diag::remark_fe_##GroupName; \
+ break; \
+ case llvm::DS_Note: \
+ DiagID = diag::note_fe_##GroupName; \
+ break; \
+ } \
+ } while (false)
+
+bool
+BackendConsumer::InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D) {
+ unsigned DiagID;
+ ComputeDiagID(D.getSeverity(), inline_asm, DiagID);
+ std::string Message = D.getMsgStr().str();
+
+ // If this problem has clang-level source location information, report the
+ // issue as being a problem in the source with a note showing the instantiated
+ // code.
+ SourceLocation LocCookie =
+ SourceLocation::getFromRawEncoding(D.getLocCookie());
+ if (LocCookie.isValid())
+ Diags.Report(LocCookie, DiagID).AddString(Message);
+ else {
+ // Otherwise, report the backend diagnostic as occurring in the generated
+ // .s file.
+ // If Loc is invalid, we still need to report the diagnostic, it just gets
+ // no location info.
+ FullSourceLoc Loc;
+ Diags.Report(Loc, DiagID).AddString(Message);
+ }
+ // We handled all the possible severities.
+ return true;
+}
+
+bool
+BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
+ if (D.getSeverity() != llvm::DS_Warning)
+ // For now, the only support we have for StackSize diagnostic is warning.
+ // We do not know how to format other severities.
+ return false;
+
+ // FIXME: We should demangle the function name.
+ // FIXME: Is there a way to get a location for that function?
+ FullSourceLoc Loc;
+ Diags.Report(Loc, diag::warn_fe_backend_frame_larger_than)
+ << D.getStackSize() << D.getFunction().getName();
+ return true;
+}
+
+/// \brief This function is invoked when the backend needs
+/// to report something to the user.
+void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
+ unsigned DiagID = diag::err_fe_inline_asm;
+ llvm::DiagnosticSeverity Severity = DI.getSeverity();
+ // Get the diagnostic ID based.
+ switch (DI.getKind()) {
+ case llvm::DK_InlineAsm:
+ if (InlineAsmDiagHandler(cast<DiagnosticInfoInlineAsm>(DI)))
+ return;
+ ComputeDiagID(Severity, inline_asm, DiagID);
+ break;
+ case llvm::DK_StackSize:
+ if (StackSizeDiagHandler(cast<DiagnosticInfoStackSize>(DI)))
+ return;
+ ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
+ break;
+ default:
+ // Plugin IDs are not bound to any value as they are set dynamically.
+ ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
+ break;
+ }
+ std::string MsgStorage;
+ {
+ raw_string_ostream Stream(MsgStorage);
+ DiagnosticPrinterRawOStream DP(Stream);
+ DI.print(DP);
+ }
+
+ // Report the backend message using the usual diagnostic mechanism.
+ FullSourceLoc Loc;
+ Diags.Report(Loc, DiagID).AddString(MsgStorage);
+}
+#undef ComputeDiagID
CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext)
: Act(_Act), LinkModule(0),
@@ -310,9 +435,7 @@
TheModule.reset(BEConsumer->takeModule());
}
-llvm::Module *CodeGenAction::takeModule() {
- return TheModule.take();
-}
+llvm::Module *CodeGenAction::takeModule() { return TheModule.release(); }
llvm::LLVMContext *CodeGenAction::takeLLVMContext() {
OwnsVMContext = false;
@@ -342,7 +465,7 @@
ASTConsumer *CodeGenAction::CreateASTConsumer(CompilerInstance &CI,
StringRef InFile) {
BackendAction BA = static_cast<BackendAction>(Act);
- OwningPtr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
+ std::unique_ptr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
if (BA != Backend_EmitNothing && !OS)
return 0;
@@ -362,20 +485,20 @@
return 0;
}
- LinkModuleToUse = getLazyBitcodeModule(BCBuf, *VMContext, &ErrorStr);
- if (!LinkModuleToUse) {
+ ErrorOr<llvm::Module *> ModuleOrErr =
+ getLazyBitcodeModule(BCBuf, *VMContext);
+ if (error_code EC = ModuleOrErr.getError()) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << ErrorStr;
+ << LinkBCFile << EC.message();
return 0;
}
+ LinkModuleToUse = ModuleOrErr.get();
}
- BEConsumer =
- new BackendConsumer(BA, CI.getDiagnostics(),
- CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(),
- CI.getFrontendOpts().ShowTimers, InFile,
- LinkModuleToUse, OS.take(), *VMContext);
+ BEConsumer = new BackendConsumer(BA, CI.getDiagnostics(), CI.getCodeGenOpts(),
+ CI.getTargetOpts(), CI.getLangOpts(),
+ CI.getFrontendOpts().ShowTimers, InFile,
+ LinkModuleToUse, OS.release(), *VMContext);
return BEConsumer;
}
@@ -408,31 +531,30 @@
SM.getFileEntryForID(SM.getMainFileID()), Err.getLineNo(),
Err.getColumnNo() + 1);
- // Get a custom diagnostic for the error. We strip off a leading
- // diagnostic code if there is one.
+ // Strip off a leading diagnostic code if there is one.
StringRef Msg = Err.getMessage();
if (Msg.startswith("error: "))
Msg = Msg.substr(7);
- // Escape '%', which is interpreted as a format character.
- SmallString<128> EscapedMessage;
- for (unsigned i = 0, e = Msg.size(); i != e; ++i) {
- if (Msg[i] == '%')
- EscapedMessage += '%';
- EscapedMessage += Msg[i];
- }
+ unsigned DiagID =
+ CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
- unsigned DiagID = CI.getDiagnostics().getCustomDiagID(
- DiagnosticsEngine::Error, EscapedMessage);
-
- CI.getDiagnostics().Report(Loc, DiagID);
+ CI.getDiagnostics().Report(Loc, DiagID) << Msg;
return;
}
+ const TargetOptions &TargetOpts = CI.getTargetOpts();
+ if (TheModule->getTargetTriple() != TargetOpts.Triple) {
+ unsigned DiagID = CI.getDiagnostics().getCustomDiagID(
+ DiagnosticsEngine::Warning,
+ "overriding the module target triple with %0");
- EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(),
- CI.getTargetOpts(), CI.getLangOpts(),
- TheModule.get(),
- BA, OS);
+ CI.getDiagnostics().Report(SourceLocation(), DiagID) << TargetOpts.Triple;
+ TheModule->setTargetTriple(TargetOpts.Triple);
+ }
+
+ EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
+ CI.getLangOpts(), CI.getTarget().getTargetDescription(),
+ TheModule.get(), BA, OS);
return;
}
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index ce1b445..806540a 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -16,12 +16,12 @@
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CodeGenModule.h"
+#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/StmtCXX.h"
-#include "clang/Basic/OpenCL.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
@@ -44,7 +44,8 @@
NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
- SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
+ PGO(cgm), SwitchInsn(0), SwitchWeights(0),
+ CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
CXXThisValue(0), CXXDefaultInitExprThis(0),
CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
@@ -157,7 +158,7 @@
// cleans up functions which started with a unified return block.
if (ReturnBlock.getBlock()->hasOneUse()) {
llvm::BranchInst *BI =
- dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->use_begin());
+ dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
if (BI && BI->isUnconditional() &&
BI->getSuccessor(0) == ReturnBlock.getBlock()) {
// Reset insertion point, including debug location, and delete the
@@ -275,6 +276,14 @@
if (CGM.getCodeGenOpts().EmitDeclMetadata)
EmitDeclMetadata();
+
+ for (SmallVectorImpl<std::pair<llvm::Instruction *, llvm::Value *> >::iterator
+ I = DeferredReplacements.begin(),
+ E = DeferredReplacements.end();
+ I != E; ++I) {
+ I->first->replaceAllUsesWith(I->second);
+ I->first->eraseFromParent();
+ }
}
/// ShouldInstrumentFunction - Return true if the current function should be
@@ -381,7 +390,12 @@
if (pointeeTy.isVolatileQualified())
typeQuals += typeQuals.empty() ? "volatile" : " volatile";
} else {
- addressQuals.push_back(Builder.getInt32(0));
+ uint32_t AddrSpc = 0;
+ if (ty->isImageType())
+ AddrSpc =
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+
+ addressQuals.push_back(Builder.getInt32(AddrSpc));
// Get argument type name.
std::string typeName = ty.getUnqualifiedType().getAsString();
@@ -399,16 +413,17 @@
if (ty.isVolatileQualified())
typeQuals += typeQuals.empty() ? "volatile" : " volatile";
}
-
+
argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
// Get image access qualifier:
if (ty->isImageType()) {
- if (parm->hasAttr<OpenCLImageAccessAttr>() &&
- parm->getAttr<OpenCLImageAccessAttr>()->getAccess() == CLIA_write_only)
+ const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>();
+ if (A && A->isWriteOnly())
accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
else
accessQuals.push_back(llvm::MDString::get(Context, "read_only"));
+ // FIXME: what about read_write?
} else
accessQuals.push_back(llvm::MDString::get(Context, "none"));
@@ -438,16 +453,15 @@
GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs,
Builder, getContext());
- if (FD->hasAttr<VecTypeHintAttr>()) {
- VecTypeHintAttr *attr = FD->getAttr<VecTypeHintAttr>();
- QualType hintQTy = attr->getTypeHint();
+ if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
+ QualType hintQTy = A->getTypeHint();
const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
bool isSignedInteger =
hintQTy->isSignedIntegerType() ||
(hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
llvm::Value *attrMDArgs[] = {
llvm::MDString::get(Context, "vec_type_hint"),
- llvm::UndefValue::get(CGM.getTypes().ConvertType(attr->getTypeHint())),
+ llvm::UndefValue::get(CGM.getTypes().ConvertType(A->getTypeHint())),
llvm::ConstantInt::get(
llvm::IntegerType::get(Context, 32),
llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0)))
@@ -455,24 +469,22 @@
kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
}
- if (FD->hasAttr<WorkGroupSizeHintAttr>()) {
- WorkGroupSizeHintAttr *attr = FD->getAttr<WorkGroupSizeHintAttr>();
+ if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
llvm::Value *attrMDArgs[] = {
llvm::MDString::get(Context, "work_group_size_hint"),
- Builder.getInt32(attr->getXDim()),
- Builder.getInt32(attr->getYDim()),
- Builder.getInt32(attr->getZDim())
+ Builder.getInt32(A->getXDim()),
+ Builder.getInt32(A->getYDim()),
+ Builder.getInt32(A->getZDim())
};
kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
}
- if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
- ReqdWorkGroupSizeAttr *attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
+ if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
llvm::Value *attrMDArgs[] = {
llvm::MDString::get(Context, "reqd_work_group_size"),
- Builder.getInt32(attr->getXDim()),
- Builder.getInt32(attr->getYDim()),
- Builder.getInt32(attr->getZDim())
+ Builder.getInt32(A->getXDim()),
+ Builder.getInt32(A->getYDim()),
+ Builder.getInt32(A->getZDim())
};
kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
}
@@ -505,15 +517,18 @@
}
// Pass inline keyword to optimizer if it appears explicitly on any
- // declaration.
- if (!CGM.getCodeGenOpts().NoInline)
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
- for (FunctionDecl::redecl_iterator RI = FD->redecls_begin(),
- RE = FD->redecls_end(); RI != RE; ++RI)
+ // declaration. Also, in the case of -fno-inline attach NoInline
+ // attribute to all function that are not marked AlwaysInline.
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if (!CGM.getCodeGenOpts().NoInline) {
+ for (auto RI : FD->redecls())
if (RI->isInlineSpecified()) {
Fn->addFnAttr(llvm::Attribute::InlineHint);
break;
}
+ } else if (!FD->hasAttr<AlwaysInlineAttr>())
+ Fn->addFnAttr(llvm::Attribute::NoInline);
+ }
if (getLangOpts().OpenCL) {
// Add metadata for a kernel function.
@@ -581,6 +596,14 @@
// Indirect aggregate return; emit returned value directly into sret slot.
// This reduces code size, and affects correctness in C++.
ReturnValue = CurFn->arg_begin();
+ } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
+ !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
+ // Load the sret pointer from the argument struct and return into that.
+ unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
+ llvm::Function::arg_iterator EI = CurFn->arg_end();
+ --EI;
+ llvm::Value *Addr = Builder.CreateStructGEP(EI, Idx);
+ ReturnValue = Builder.CreateLoad(Addr, "agg.result");
} else {
ReturnValue = CreateIRTemp(RetTy, "retval");
@@ -645,12 +668,34 @@
void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
const Stmt *Body) {
+ RegionCounter Cnt = getPGORegionCounter(Body);
+ Cnt.beginRegion(Builder);
if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
EmitCompoundStmtWithoutScope(*S);
else
EmitStmt(Body);
}
+/// When instrumenting to collect profile data, the counts for some blocks
+/// such as switch cases need to not include the fall-through counts, so
+/// emit a branch around the instrumentation code. When not instrumenting,
+/// this just calls EmitBlock().
+void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
+ RegionCounter &Cnt) {
+ llvm::BasicBlock *SkipCountBB = 0;
+ if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
+ // When instrumenting for profiling, the fallthrough to certain
+ // statements needs to skip over the instrumentation code so that we
+ // get an accurate count.
+ SkipCountBB = createBasicBlock("skipcount");
+ EmitBranch(SkipCountBB);
+ }
+ EmitBlock(BB);
+ Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
+ if (SkipCountBB)
+ EmitBlock(SkipCountBB);
+}
+
/// Tries to mark the given function nounwind based on the
/// non-existence of any throwing calls within it. We believe this is
/// lightweight enough to do at -O0.
@@ -691,19 +736,22 @@
DebugInfo = NULL; // disable debug info indefinitely for this function
FunctionArgList Args;
- QualType ResTy = FD->getResultType();
+ QualType ResTy = FD->getReturnType();
CurGD = GD;
- const CXXMethodDecl *MD;
- if ((MD = dyn_cast<CXXMethodDecl>(FD)) && MD->isInstance()) {
+ const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
+ if (MD && MD->isInstance()) {
if (CGM.getCXXABI().HasThisReturn(GD))
ResTy = MD->getThisType(getContext());
- CGM.getCXXABI().BuildInstanceFunctionParams(*this, ResTy, Args);
+ CGM.getCXXABI().buildThisParam(*this, Args);
}
for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i)
Args.push_back(FD->getParamDecl(i));
+ if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
+ CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
+
SourceRange BodyRange;
if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
CurEHLocation = BodyRange.getEnd();
@@ -712,6 +760,7 @@
StartFunction(GD, ResTy, Fn, FnInfo, Args, BodyRange.getBegin());
// Generate the body of the function.
+ PGO.assignRegionCounters(GD.getDecl(), CurFn);
if (isa<CXXDestructorDecl>(FD))
EmitDestructorBody(Args);
else if (isa<CXXConstructorDecl>(FD))
@@ -753,7 +802,7 @@
// If the '}' that terminates a function is reached, and the value of the
// function call is used by the caller, the behavior is undefined.
if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() &&
- !FD->getResultType()->isVoidType() && Builder.GetInsertBlock()) {
+ !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
if (SanOpts->Return)
EmitCheck(Builder.getFalse(), "missing_return",
EmitCheckSourceLocation(FD->getLocation()),
@@ -771,6 +820,9 @@
// a quick pass now to see if we can.
if (!CurFn->doesNotThrow())
TryMarkNoThrow(CurFn);
+
+ PGO.emitInstrumentationData();
+ PGO.destroyRegionCounters();
}
/// ContainsLabel - Return true if the statement contains a label in it. If
@@ -869,19 +921,25 @@
///
void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
llvm::BasicBlock *TrueBlock,
- llvm::BasicBlock *FalseBlock) {
+ llvm::BasicBlock *FalseBlock,
+ uint64_t TrueCount) {
Cond = Cond->IgnoreParens();
if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
+
// Handle X && Y in a condition.
if (CondBOp->getOpcode() == BO_LAnd) {
+ RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
// If we have "1 && X", simplify the code. "0 && X" would have constant
// folded if the case was simple enough.
bool ConstantBool = false;
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
ConstantBool) {
// br(1 && X) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// If we have "X && 1", simplify the code to use an uncond branch.
@@ -889,33 +947,42 @@
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
ConstantBool) {
// br(X && 1) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+ return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is false, we
// want to jump to the FalseBlock.
llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
+ // The counter tells us how often we evaluate RHS, and all of TrueCount
+ // can be propagated to that branch.
+ uint64_t RHSCount = Cnt.getCount();
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
EmitBlock(LHSTrue);
// Any temporaries created here are conditional.
+ Cnt.beginRegion(Builder);
eval.begin(*this);
- EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
eval.end(*this);
return;
}
if (CondBOp->getOpcode() == BO_LOr) {
+ RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
// If we have "0 || X", simplify the code. "1 || X" would have constant
// folded if the case was simple enough.
bool ConstantBool = false;
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
!ConstantBool) {
// br(0 || X) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// If we have "X || 0", simplify the code to use an uncond branch.
@@ -923,20 +990,28 @@
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
!ConstantBool) {
// br(X || 0) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+ return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is true, we
// want to jump to the TrueBlock.
llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
+ // We have the count for entry to the RHS and for the whole expression
+ // being true, so we can divy up True count between the short circuit and
+ // the RHS.
+ uint64_t LHSCount = Cnt.getParentCount() - Cnt.getCount();
+ uint64_t RHSCount = TrueCount - LHSCount;
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
+ EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
EmitBlock(LHSFalse);
// Any temporaries created here are conditional.
+ Cnt.beginRegion(Builder);
eval.begin(*this);
- EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
+
eval.end(*this);
return;
@@ -945,8 +1020,13 @@
if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
// br(!x, t, f) -> br(x, f, t)
- if (CondUOp->getOpcode() == UO_LNot)
- return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
+ if (CondUOp->getOpcode() == UO_LNot) {
+ // Negate the count.
+ uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
+ // Negate the condition and swap the destination blocks.
+ return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
+ FalseCount);
+ }
}
if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
@@ -954,17 +1034,32 @@
llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
+ RegionCounter Cnt = getPGORegionCounter(CondOp);
ConditionalEvaluation cond(*this);
- EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
+ EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+
+ // When computing PGO branch weights, we only know the overall count for
+ // the true block. This code is essentially doing tail duplication of the
+ // naive code-gen, introducing new edges for which counts are not
+ // available. Divide the counts proportionally between the LHS and RHS of
+ // the conditional operator.
+ uint64_t LHSScaledTrueCount = 0;
+ if (TrueCount) {
+ double LHSRatio = Cnt.getCount() / (double) Cnt.getParentCount();
+ LHSScaledTrueCount = TrueCount * LHSRatio;
+ }
cond.begin(*this);
EmitBlock(LHSBlock);
- EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
+ LHSScaledTrueCount);
cond.end(*this);
cond.begin(*this);
EmitBlock(RHSBlock);
- EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
+ EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount - LHSScaledTrueCount);
cond.end(*this);
return;
@@ -980,9 +1075,15 @@
return;
}
+ // Create branch weights based on the number of times we get here and the
+ // number of times the condition should be true.
+ uint64_t CurrentCount = std::max(PGO.getCurrentRegionCount(), TrueCount);
+ llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
+ CurrentCount - TrueCount);
+
// Emit the code with the fully general case.
llvm::Value *CondV = EvaluateExprAsBool(Cond);
- Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
+ Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
}
/// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -1075,7 +1176,7 @@
getContext().getAsArrayType(Ty))) {
QualType eltType;
llvm::Value *numElts;
- llvm::tie(numElts, eltType) = getVLASize(vlaType);
+ std::tie(numElts, eltType) = getVLASize(vlaType);
SizeVal = numElts;
CharUnits eltSize = getContext().getTypeSizeInChars(eltType);
@@ -1264,7 +1365,7 @@
numElements = vlaSize;
} else {
// It's undefined behavior if this wraps around, so mark it that way.
- // FIXME: Teach -fcatch-undefined-behavior to trap this.
+ // FIXME: Teach -fsanitize=undefined to trap this.
numElements = Builder.CreateNUWMul(numElements, vlaSize);
}
} while ((type = getContext().getAsVariableArrayType(elementType)));
@@ -1308,6 +1409,10 @@
case Type::ObjCObjectPointer:
llvm_unreachable("type class is never variably-modified!");
+ case Type::Adjusted:
+ type = cast<AdjustedType>(ty)->getAdjustedType();
+ break;
+
case Type::Decayed:
type = cast<DecayedType>(ty)->getPointeeType();
break;
@@ -1375,7 +1480,7 @@
case Type::FunctionProto:
case Type::FunctionNoProto:
- type = cast<FunctionType>(ty)->getResultType();
+ type = cast<FunctionType>(ty)->getReturnType();
break;
case Type::Paren:
@@ -1464,12 +1569,10 @@
assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
// FIXME We create a new bitcast for every annotation because that's what
// llvm-gcc was doing.
- for (specific_attr_iterator<AnnotateAttr>
- ai = D->specific_attr_begin<AnnotateAttr>(),
- ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai)
+ for (const auto *I : D->specific_attrs<AnnotateAttr>())
EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation),
Builder.CreateBitCast(V, CGM.Int8PtrTy, V->getName()),
- (*ai)->getAnnotation(), D->getLocation());
+ I->getAnnotation(), D->getLocation());
}
llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
@@ -1479,15 +1582,13 @@
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
CGM.Int8PtrTy);
- for (specific_attr_iterator<AnnotateAttr>
- ai = D->specific_attr_begin<AnnotateAttr>(),
- ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai) {
+ for (const auto *I : D->specific_attrs<AnnotateAttr>()) {
// FIXME Always emit the cast inst so we can differentiate between
// annotation on the first field of a struct and annotation on the struct
// itself.
if (VTy != CGM.Int8PtrTy)
V = Builder.Insert(new llvm::BitCastInst(V, CGM.Int8PtrTy));
- V = EmitAnnotationCall(F, V, (*ai)->getAnnotation(), D->getLocation());
+ V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation());
V = Builder.CreateBitCast(V, VTy);
}
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index db291e3..5b0653a 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -17,8 +17,9 @@
#include "CGBuilder.h"
#include "CGDebugInfo.h"
#include "CGValue.h"
-#include "EHScopeStack.h"
#include "CodeGenModule.h"
+#include "CodeGenPGO.h"
+#include "EHScopeStack.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
@@ -30,8 +31,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
namespace llvm {
class BasicBlock;
@@ -687,8 +688,8 @@
// act exactly like l-values but are formally required to be
// r-values in C.
return expr->isGLValue() ||
- expr->getType()->isRecordType() ||
- expr->getType()->isFunctionType();
+ expr->getType()->isFunctionType() ||
+ hasAggregateEvaluationKind(expr->getType());
}
static OpaqueValueMappingData bind(CodeGenFunction &CGF,
@@ -827,9 +828,21 @@
};
SmallVector<BreakContinue, 8> BreakContinueStack;
+ CodeGenPGO PGO;
+
+public:
+ /// Get a counter for instrumentation of the region associated with the given
+ /// statement.
+ RegionCounter getPGORegionCounter(const Stmt *S) {
+ return RegionCounter(PGO, S);
+ }
+private:
+
/// SwitchInsn - This is nearest current switch instruction. It is null if
/// current context is not in a switch.
llvm::SwitchInst *SwitchInsn;
+ /// The branch weights of SwitchInsn when doing instrumentation based PGO.
+ SmallVector<uint64_t, 16> *SwitchWeights;
/// CaseRangeBlock - This block holds if condition check for last case
/// statement range in current switch instruction.
@@ -1019,6 +1032,7 @@
void pushLifetimeExtendedDestroy(CleanupKind kind, llvm::Value *addr,
QualType type, Destroyer *destroyer,
bool useEHCleanupForArray);
+ void pushStackRestore(CleanupKind kind, llvm::Value *SPMem);
void emitDestroy(llvm::Value *addr, QualType type, Destroyer *destroyer,
bool useEHCleanupForArray);
llvm::Function *generateDestroyHelper(llvm::Constant *addr, QualType type,
@@ -1137,6 +1151,7 @@
void EmitDestructorBody(FunctionArgList &Args);
void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body);
+ void EmitBlockWithFallThrough(llvm::BasicBlock *BB, RegionCounter &Cnt);
void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
CallArgList &CallArgs);
@@ -1380,6 +1395,10 @@
AggValueSlot::IsNotAliased);
}
+ /// CreateInAllocaTmp - Create a temporary memory object for the given
+ /// aggregate type.
+ AggValueSlot CreateInAllocaTmp(QualType T, const Twine &Name = "inalloca");
+
/// Emit a cast to void* in the appropriate address space.
llvm::Value *EmitCastToVoidPtr(llvm::Value *value);
@@ -1767,7 +1786,8 @@
llvm::GlobalValue::LinkageTypes Linkage);
/// EmitParmDecl - Emit a ParmVarDecl or an ImplicitParamDecl.
- void EmitParmDecl(const VarDecl &D, llvm::Value *Arg, unsigned ArgNo);
+ void EmitParmDecl(const VarDecl &D, llvm::Value *Arg, bool ArgIsPointer,
+ unsigned ArgNo);
/// protectFromPeepholes - Protect a value that we're intending to
/// store to the side, but which will probably be used later, from
@@ -2159,6 +2179,18 @@
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
+ llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
+ unsigned LLVMIntrinsic,
+ unsigned AltLLVMIntrinsic,
+ const char *NameHint,
+ unsigned Modifier,
+ const CallExpr *E,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ llvm::Value *Align = 0);
+ llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+ unsigned Modifier, llvm::Type *ArgTy,
+ const CallExpr *E);
llvm::Value *EmitNeonCall(llvm::Function *F,
SmallVectorImpl<llvm::Value*> &O,
const char *name,
@@ -2168,6 +2200,20 @@
bool negateForRightShift);
llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
llvm::Type *Ty, bool usgn, const char *name);
+ llvm::Value *EmitConcatVectors(llvm::Value *Lo, llvm::Value *Hi,
+ llvm::Type *ArgTy);
+ llvm::Value *EmitExtractHigh(llvm::Value *In, llvm::Type *ResTy);
+ // Helper functions for EmitARM64BuiltinExpr.
+ llvm::Value *vectorWrapScalar8(llvm::Value *Op);
+ llvm::Value *vectorWrapScalar16(llvm::Value *Op);
+ llvm::Value *emitVectorWrappedScalar8Intrinsic(
+ unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
+ llvm::Value *emitVectorWrappedScalar16Intrinsic(
+ unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
+ llvm::Value *EmitARM64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitNeon64Call(llvm::Function *F,
+ llvm::SmallVectorImpl<llvm::Value *> &O,
+ const char *name);
llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -2304,9 +2350,9 @@
/// CreateStaticVarDecl - Create a zero-initialized LLVM global for
/// a static local variable.
- llvm::GlobalVariable *CreateStaticVarDecl(const VarDecl &D,
- const char *Separator,
- llvm::GlobalValue::LinkageTypes Linkage);
+ llvm::Constant *CreateStaticVarDecl(const VarDecl &D,
+ const char *Separator,
+ llvm::GlobalValue::LinkageTypes Linkage);
/// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
/// global variable that has already been created for it. If the initializer
@@ -2413,8 +2459,10 @@
/// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
/// if statement) to the specified blocks. Based on the condition, this might
/// try to simplify the codegen of the conditional based on the branch.
+ /// TrueCount should be the number of times we expect the condition to
+ /// evaluate to true based on PGO data.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
- llvm::BasicBlock *FalseBlock);
+ llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
/// \brief Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.
@@ -2467,6 +2515,11 @@
llvm::MDNode *getRangeForLoadFromType(QualType Ty);
void EmitReturnOfRValue(RValue RV, QualType Ty);
+ void deferPlaceholderReplacement(llvm::Instruction *Old, llvm::Value *New);
+
+ llvm::SmallVector<std::pair<llvm::Instruction *, llvm::Value *>, 4>
+ DeferredReplacements;
+
/// ExpandTypeFromArgs - Reconstruct a structure of type \arg Ty
/// from function arguments into \arg Dst. See ABIArgInfo::Expand.
///
@@ -2492,69 +2545,81 @@
std::string &ConstraintStr,
SourceLocation Loc);
+public:
/// EmitCallArgs - Emit call arguments for a function.
- /// The CallArgTypeInfo parameter is used for iterating over the known
- /// argument types of the function being called.
- template<typename T>
- void EmitCallArgs(CallArgList& Args, const T* CallArgTypeInfo,
+ template <typename T>
+ void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo,
CallExpr::const_arg_iterator ArgBeg,
CallExpr::const_arg_iterator ArgEnd,
bool ForceColumnInfo = false) {
- CGDebugInfo *DI = getDebugInfo();
- SourceLocation CallLoc;
- if (DI) CallLoc = DI->getLocation();
-
- CallExpr::const_arg_iterator Arg = ArgBeg;
-
- // First, use the argument types that the type info knows about
if (CallArgTypeInfo) {
- for (typename T::arg_type_iterator I = CallArgTypeInfo->arg_type_begin(),
- E = CallArgTypeInfo->arg_type_end(); I != E; ++I, ++Arg) {
- assert(Arg != ArgEnd && "Running over edge of argument list!");
- QualType ArgType = *I;
-#ifndef NDEBUG
- QualType ActualArgType = Arg->getType();
- if (ArgType->isPointerType() && ActualArgType->isPointerType()) {
- QualType ActualBaseType =
- ActualArgType->getAs<PointerType>()->getPointeeType();
- QualType ArgBaseType =
- ArgType->getAs<PointerType>()->getPointeeType();
- if (ArgBaseType->isVariableArrayType()) {
- if (const VariableArrayType *VAT =
- getContext().getAsVariableArrayType(ActualBaseType)) {
- if (!VAT->getSizeExpr())
- ActualArgType = ArgType;
- }
- }
- }
- assert(getContext().getCanonicalType(ArgType.getNonReferenceType()).
- getTypePtr() ==
- getContext().getCanonicalType(ActualArgType).getTypePtr() &&
- "type mismatch in call argument!");
-#endif
- EmitCallArg(Args, *Arg, ArgType);
-
- // Each argument expression could modify the debug
- // location. Restore it.
- if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
- }
-
- // Either we've emitted all the call args, or we have a call to a
- // variadic function.
- assert((Arg == ArgEnd || CallArgTypeInfo->isVariadic()) &&
- "Extra arguments in non-variadic function!");
-
- }
-
- // If we still have any arguments, emit them using the type of the argument.
- for (; Arg != ArgEnd; ++Arg) {
- EmitCallArg(Args, *Arg, Arg->getType());
-
- // Restore the debug location.
- if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+ EmitCallArgs(Args, CallArgTypeInfo->isVariadic(),
+ CallArgTypeInfo->param_type_begin(),
+ CallArgTypeInfo->param_type_end(), ArgBeg, ArgEnd,
+ ForceColumnInfo);
+ } else {
+ // T::param_type_iterator might not have a default ctor.
+ const QualType *NoIter = 0;
+ EmitCallArgs(Args, /*AllowExtraArguments=*/true, NoIter, NoIter, ArgBeg,
+ ArgEnd, ForceColumnInfo);
}
}
+ template<typename ArgTypeIterator>
+ void EmitCallArgs(CallArgList& Args,
+ bool AllowExtraArguments,
+ ArgTypeIterator ArgTypeBeg,
+ ArgTypeIterator ArgTypeEnd,
+ CallExpr::const_arg_iterator ArgBeg,
+ CallExpr::const_arg_iterator ArgEnd,
+ bool ForceColumnInfo = false) {
+ SmallVector<QualType, 16> ArgTypes;
+ CallExpr::const_arg_iterator Arg = ArgBeg;
+
+ // First, use the argument types that the type info knows about
+ for (ArgTypeIterator I = ArgTypeBeg, E = ArgTypeEnd; I != E; ++I, ++Arg) {
+ assert(Arg != ArgEnd && "Running over edge of argument list!");
+#ifndef NDEBUG
+ QualType ArgType = *I;
+ QualType ActualArgType = Arg->getType();
+ if (ArgType->isPointerType() && ActualArgType->isPointerType()) {
+ QualType ActualBaseType =
+ ActualArgType->getAs<PointerType>()->getPointeeType();
+ QualType ArgBaseType =
+ ArgType->getAs<PointerType>()->getPointeeType();
+ if (ArgBaseType->isVariableArrayType()) {
+ if (const VariableArrayType *VAT =
+ getContext().getAsVariableArrayType(ActualBaseType)) {
+ if (!VAT->getSizeExpr())
+ ActualArgType = ArgType;
+ }
+ }
+ }
+ assert(getContext().getCanonicalType(ArgType.getNonReferenceType()).
+ getTypePtr() ==
+ getContext().getCanonicalType(ActualArgType).getTypePtr() &&
+ "type mismatch in call argument!");
+#endif
+ ArgTypes.push_back(*I);
+ }
+
+ // Either we've emitted all the call args, or we have a call to variadic
+ // function or some other call that allows extra arguments.
+ assert((Arg == ArgEnd || AllowExtraArguments) &&
+ "Extra arguments in non-variadic function!");
+
+ // If we still have any arguments, emit them using the type of the argument.
+ for (; Arg != ArgEnd; ++Arg)
+ ArgTypes.push_back(Arg->getType());
+
+ EmitCallArgs(Args, ArgTypes, ArgBeg, ArgEnd, ForceColumnInfo);
+ }
+
+ void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
+ CallExpr::const_arg_iterator ArgBeg,
+ CallExpr::const_arg_iterator ArgEnd, bool ForceColumnInfo);
+
+private:
const TargetCodeGenInfo &getTargetHooks() const {
return CGM.getTargetCodeGenInfo();
}
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 792fbfc..c26f769 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -19,6 +19,7 @@
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
+#include "CodeGenPGO.h"
#include "CodeGenTBAA.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -40,30 +41,30 @@
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/Mangler.h"
using namespace clang;
using namespace CodeGen;
static const char AnnotationSection[] = "llvm.metadata";
-static CGCXXABI &createCXXABI(CodeGenModule &CGM) {
+static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
switch (CGM.getTarget().getCXXABI().getKind()) {
case TargetCXXABI::GenericAArch64:
case TargetCXXABI::GenericARM:
case TargetCXXABI::iOS:
+ case TargetCXXABI::iOS64:
case TargetCXXABI::GenericItanium:
- return *CreateItaniumCXXABI(CGM);
+ return CreateItaniumCXXABI(CGM);
case TargetCXXABI::Microsoft:
- return *CreateMicrosoftCXXABI(CGM);
+ return CreateMicrosoftCXXABI(CGM);
}
llvm_unreachable("invalid C++ ABI kind");
@@ -77,7 +78,8 @@
ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
- NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
+ NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
+ CFConstantStringClassRef(0),
ConstantStringClassRef(0), NSConstantStringType(0),
NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
@@ -117,7 +119,7 @@
if (SanOpts.Thread ||
(!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
- ABI.getMangleContext());
+ getCXXABI().getMangleContext());
// If debug info or coverage generation is enabled, create the CGDebugInfo
// object.
@@ -131,6 +133,9 @@
if (C.getLangOpts().ObjCAutoRefCount)
ARCData = new ARCEntrypoints();
RRData = new RREntrypoints();
+
+ if (!CodeGenOpts.InstrProfileInput.empty())
+ PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
}
CodeGenModule::~CodeGenModule() {
@@ -138,7 +143,6 @@
delete OpenCLRuntime;
delete CUDARuntime;
delete TheTargetCodeGenInfo;
- delete &ABI;
delete TBAA;
delete DebugInfo;
delete ARCData;
@@ -184,10 +188,14 @@
llvm::Function *OldF = cast<llvm::Function>(Entry);
llvm::Function *NewF = dyn_cast<llvm::Function>(Replacement);
if (!NewF) {
- llvm::ConstantExpr *CE = cast<llvm::ConstantExpr>(Replacement);
- assert(CE->getOpcode() == llvm::Instruction::BitCast ||
- CE->getOpcode() == llvm::Instruction::GetElementPtr);
- NewF = dyn_cast<llvm::Function>(CE->getOperand(0));
+ if (llvm::GlobalAlias *Alias = dyn_cast<llvm::GlobalAlias>(Replacement)) {
+ NewF = dyn_cast<llvm::Function>(Alias->getAliasedGlobal());
+ } else {
+ llvm::ConstantExpr *CE = cast<llvm::ConstantExpr>(Replacement);
+ assert(CE->getOpcode() == llvm::Instruction::BitCast ||
+ CE->getOpcode() == llvm::Instruction::GetElementPtr);
+ NewF = dyn_cast<llvm::Function>(CE->getOperand(0));
+ }
}
// Replace old with new, but keep the old order.
@@ -201,6 +209,9 @@
}
void CodeGenModule::checkAliases() {
+ // Check if the constructed aliases are well formed. It is really unfortunate
+ // that we have to do this in CodeGen, but we only construct mangled names
+ // and aliases during codegen.
bool Error = false;
for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
E = Aliases.end(); I != E; ++I) {
@@ -211,12 +222,37 @@
llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
llvm::GlobalAlias *Alias = cast<llvm::GlobalAlias>(Entry);
llvm::GlobalValue *GV = Alias->getAliasedGlobal();
- if (GV->isDeclaration()) {
- Error = true;
- getDiags().Report(AA->getLocation(), diag::err_alias_to_undefined);
- } else if (!Alias->resolveAliasedGlobal(/*stopOnWeak*/ false)) {
+ if (!GV) {
Error = true;
getDiags().Report(AA->getLocation(), diag::err_cyclic_alias);
+ } else if (GV->isDeclaration()) {
+ Error = true;
+ getDiags().Report(AA->getLocation(), diag::err_alias_to_undefined);
+ }
+
+ // We have to handle alias to weak aliases in here. LLVM itself disallows
+ // this since the object semantics would not match the IL one. For
+ // compatibility with gcc we implement it by just pointing the alias
+ // to its aliasee's aliasee. We also warn, since the user is probably
+ // expecting the link to be weak.
+ llvm::Constant *Aliasee = Alias->getAliasee();
+ llvm::GlobalValue *AliaseeGV;
+ if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee)) {
+ assert((CE->getOpcode() == llvm::Instruction::BitCast ||
+ CE->getOpcode() == llvm::Instruction::AddrSpaceCast) &&
+ "Unsupported aliasee");
+ AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
+ } else {
+ AliaseeGV = cast<llvm::GlobalValue>(Aliasee);
+ }
+ if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
+ if (GA->mayBeOverridden()) {
+ getDiags().Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
+ << GA->getAliasedGlobal()->getName() << GA->getName();
+ Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ GA->getAliasee(), Alias->getType());
+ Alias->setAliasee(Aliasee);
+ }
}
}
if (!Error)
@@ -233,6 +269,10 @@
}
}
+void CodeGenModule::clear() {
+ DeferredDeclsToEmit.clear();
+}
+
void CodeGenModule::Release() {
EmitDeferred();
applyReplacements();
@@ -247,7 +287,7 @@
EmitCtorList(GlobalDtors, "llvm.global_dtors");
EmitGlobalAnnotations();
EmitStaticExternCAliases();
- EmitLLVMUsed();
+ emitLLVMUsed();
if (CodeGenOpts.Autolink &&
(Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
@@ -332,9 +372,9 @@
Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo);
}
-void CodeGenModule::Error(SourceLocation loc, StringRef error) {
- unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, error);
- getDiags().Report(Context.getFullLoc(loc), diagID);
+void CodeGenModule::Error(SourceLocation loc, StringRef message) {
+ unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+ getDiags().Report(Context.getFullLoc(loc), diagID) << message;
}
/// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -405,81 +445,13 @@
TLM = GetLLVMTLSModel(CodeGenOpts.getDefaultTLSModel());
// Override the TLS model if it is explicitly specified.
- if (D.hasAttr<TLSModelAttr>()) {
- const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>();
+ if (const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>()) {
TLM = GetLLVMTLSModel(Attr->getModel());
}
GV->setThreadLocalMode(TLM);
}
-/// Set the symbol visibility of type information (vtable and RTTI)
-/// associated with the given type.
-void CodeGenModule::setTypeVisibility(llvm::GlobalValue *GV,
- const CXXRecordDecl *RD,
- TypeVisibilityKind TVK) const {
- setGlobalVisibility(GV, RD);
-
- if (!CodeGenOpts.HiddenWeakVTables)
- return;
-
- // We never want to drop the visibility for RTTI names.
- if (TVK == TVK_ForRTTIName)
- return;
-
- // We want to drop the visibility to hidden for weak type symbols.
- // This isn't possible if there might be unresolved references
- // elsewhere that rely on this symbol being visible.
-
- // This should be kept roughly in sync with setThunkVisibility
- // in CGVTables.cpp.
-
- // Preconditions.
- if (GV->getLinkage() != llvm::GlobalVariable::LinkOnceODRLinkage ||
- GV->getVisibility() != llvm::GlobalVariable::DefaultVisibility)
- return;
-
- // Don't override an explicit visibility attribute.
- if (RD->getExplicitVisibility(NamedDecl::VisibilityForType))
- return;
-
- switch (RD->getTemplateSpecializationKind()) {
- // We have to disable the optimization if this is an EI definition
- // because there might be EI declarations in other shared objects.
- case TSK_ExplicitInstantiationDefinition:
- case TSK_ExplicitInstantiationDeclaration:
- return;
-
- // Every use of a non-template class's type information has to emit it.
- case TSK_Undeclared:
- break;
-
- // In theory, implicit instantiations can ignore the possibility of
- // an explicit instantiation declaration because there necessarily
- // must be an EI definition somewhere with default visibility. In
- // practice, it's possible to have an explicit instantiation for
- // an arbitrary template class, and linkers aren't necessarily able
- // to deal with mixed-visibility symbols.
- case TSK_ExplicitSpecialization:
- case TSK_ImplicitInstantiation:
- return;
- }
-
- // If there's a key function, there may be translation units
- // that don't have the key function's definition. But ignore
- // this if we're emitting RTTI under -fno-rtti.
- if (!(TVK != TVK_ForRTTI) || LangOpts.RTTI) {
- // FIXME: what should we do if we "lose" the key function during
- // the emission of the file?
- if (Context.getCurrentKeyFunction(RD))
- return;
- }
-
- // Otherwise, drop the visibility to hidden.
- GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- GV->setUnnamedAddr(true);
-}
-
StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
@@ -581,18 +553,13 @@
CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
- if (isa<CXXDestructorDecl>(D) &&
- getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
- GD.getDtorType()))
- return llvm::Function::LinkOnceODRLinkage;
-
GVALinkage Linkage = getContext().GetGVALinkageForFunction(D);
if (Linkage == GVA_Internal)
return llvm::Function::InternalLinkage;
if (D->hasAttr<DLLExportAttr>())
- return llvm::Function::DLLExportLinkage;
+ return llvm::Function::ExternalLinkage;
if (D->hasAttr<WeakAttr>())
return llvm::Function::WeakAnyLinkage;
@@ -622,11 +589,18 @@
// explicit instantiations can occur in multiple translation units
// and must all be equivalent. However, we are not allowed to
// throw away these explicit instantiations.
- if (Linkage == GVA_ExplicitTemplateInstantiation)
+ if (Linkage == GVA_StrongODR)
return !Context.getLangOpts().AppleKext
? llvm::Function::WeakODRLinkage
: llvm::Function::ExternalLinkage;
-
+
+ // Destructor variants in the Microsoft C++ ABI are always linkonce_odr thunks
+ // emitted on an as-needed basis.
+ if (isa<CXXDestructorDecl>(D) &&
+ getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
+ GD.getDtorType()))
+ return llvm::Function::LinkOnceODRLinkage;
+
// Otherwise, we have strong external linkage.
assert(Linkage == GVA_StrongExternal);
return llvm::Function::ExternalLinkage;
@@ -686,10 +660,15 @@
// Naked implies noinline: we should not be inlining such functions.
B.addAttribute(llvm::Attribute::Naked);
B.addAttribute(llvm::Attribute::NoInline);
+ } else if (D->hasAttr<OptimizeNoneAttr>()) {
+ // OptimizeNone implies noinline; we should not be inlining such functions.
+ B.addAttribute(llvm::Attribute::OptimizeNone);
+ B.addAttribute(llvm::Attribute::NoInline);
+ } else if (D->hasAttr<NoDuplicateAttr>()) {
+ B.addAttribute(llvm::Attribute::NoDuplicate);
} else if (D->hasAttr<NoInlineAttr>()) {
B.addAttribute(llvm::Attribute::NoInline);
- } else if ((D->hasAttr<AlwaysInlineAttr>() ||
- D->hasAttr<ForceInlineAttr>()) &&
+ } else if (D->hasAttr<AlwaysInlineAttr>() &&
!F->getAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex,
llvm::Attribute::NoInline)) {
// (noinline wins over always_inline, and we can't specify both in IR)
@@ -704,8 +683,16 @@
if (D->hasAttr<MinSizeAttr>())
B.addAttribute(llvm::Attribute::MinSize);
+ if (D->hasAttr<OptimizeNoneAttr>()) {
+ // OptimizeNone wins over OptimizeForSize and MinSize.
+ B.removeAttribute(llvm::Attribute::OptimizeForSize);
+ B.removeAttribute(llvm::Attribute::MinSize);
+ }
+
if (LangOpts.getStackProtector() == LangOptions::SSPOn)
B.addAttribute(llvm::Attribute::StackProtect);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+ B.addAttribute(llvm::Attribute::StackProtectStrong);
else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
B.addAttribute(llvm::Attribute::StackProtectReq);
@@ -751,7 +738,7 @@
GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
if (D->hasAttr<UsedAttr>())
- AddUsedGlobal(GV);
+ addUsedGlobal(GV);
if (const SectionAttr *SA = D->getAttr<SectionAttr>())
GV->setSection(SA->getName());
@@ -788,7 +775,12 @@
if (!IsIncompleteFunction)
SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
- if (getCXXABI().HasThisReturn(GD)) {
+ // Add the Returned attribute for "this", except for iOS 5 and earlier
+ // where substantial code, including the libstdc++ dylib, was compiled with
+ // GCC and does not actually return "this".
+ if (getCXXABI().HasThisReturn(GD) &&
+ !(getTarget().getTriple().isiOS() &&
+ getTarget().getTriple().isOSVersionLT(6))) {
assert(!F->arg_empty() &&
F->arg_begin()->getType()
->canLosslesslyBitCastTo(F->getReturnType()) &&
@@ -800,7 +792,8 @@
// overridden by a definition.
if (FD->hasAttr<DLLImportAttr>()) {
- F->setLinkage(llvm::Function::DLLImportLinkage);
+ F->setLinkage(llvm::Function::ExternalLinkage);
+ F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
} else if (FD->hasAttr<WeakAttr>() ||
FD->isWeakImported()) {
// "extern_weak" is overloaded in LLVM; we probably should have
@@ -808,6 +801,8 @@
F->setLinkage(llvm::Function::ExternalWeakLinkage);
} else {
F->setLinkage(llvm::Function::ExternalLinkage);
+ if (FD->hasAttr<DLLExportAttr>())
+ F->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
LinkageInfo LV = FD->getLinkageAndVisibility();
if (LV.getLinkage() == ExternalLinkage && LV.isVisibilityExplicit()) {
@@ -825,39 +820,51 @@
llvm::Attribute::NoBuiltin);
}
-void CodeGenModule::AddUsedGlobal(llvm::GlobalValue *GV) {
+void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
assert(!GV->isDeclaration() &&
"Only globals with definition can force usage.");
LLVMUsed.push_back(GV);
}
-void CodeGenModule::EmitLLVMUsed() {
+void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) {
+ assert(!GV->isDeclaration() &&
+ "Only globals with definition can force usage.");
+ LLVMCompilerUsed.push_back(GV);
+}
+
+static void emitUsed(CodeGenModule &CGM, StringRef Name,
+ std::vector<llvm::WeakVH> &List) {
// Don't create llvm.used if there is no need.
- if (LLVMUsed.empty())
+ if (List.empty())
return;
- // Convert LLVMUsed to what ConstantArray needs.
+ // Convert List to what ConstantArray needs.
SmallVector<llvm::Constant*, 8> UsedArray;
- UsedArray.resize(LLVMUsed.size());
- for (unsigned i = 0, e = LLVMUsed.size(); i != e; ++i) {
+ UsedArray.resize(List.size());
+ for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
- llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*LLVMUsed[i]),
- Int8PtrTy);
+ llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*List[i]),
+ CGM.Int8PtrTy);
}
if (UsedArray.empty())
return;
- llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size());
+ llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size());
llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(), ATy, false,
+ new llvm::GlobalVariable(CGM.getModule(), ATy, false,
llvm::GlobalValue::AppendingLinkage,
llvm::ConstantArray::get(ATy, UsedArray),
- "llvm.used");
+ Name);
GV->setSection("llvm.metadata");
}
+void CodeGenModule::emitLLVMUsed() {
+ emitUsed(*this, "llvm.used", LLVMUsed);
+ emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed);
+}
+
void CodeGenModule::AppendLinkerOptions(StringRef Opts) {
llvm::Value *MDOpts = llvm::MDString::get(getLLVMContext(), Opts);
LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
@@ -1002,31 +1009,23 @@
// Stop if we're out of both deferred v-tables and deferred declarations.
if (DeferredDeclsToEmit.empty()) break;
- GlobalDecl D = DeferredDeclsToEmit.back();
+ DeferredGlobal &G = DeferredDeclsToEmit.back();
+ GlobalDecl D = G.GD;
+ llvm::GlobalValue *GV = G.GV;
DeferredDeclsToEmit.pop_back();
+ assert(GV == GetGlobalValue(getMangledName(D)));
// Check to see if we've already emitted this. This is necessary
// for a couple of reasons: first, decls can end up in the
// deferred-decls queue multiple times, and second, decls can end
// up with definitions in unusual ways (e.g. by an extern inline
// function acquiring a strong function redefinition). Just
// ignore these cases.
- //
- // TODO: That said, looking this up multiple times is very wasteful.
- StringRef Name = getMangledName(D);
- llvm::GlobalValue *CGRef = GetGlobalValue(Name);
- assert(CGRef && "Deferred decl wasn't referenced?");
-
- if (!CGRef->isDeclaration())
- continue;
-
- // GlobalAlias::isDeclaration() defers to the aliasee, but for our
- // purposes an alias counts as a definition.
- if (isa<llvm::GlobalAlias>(CGRef))
+ if(!GV->isDeclaration())
continue;
// Otherwise, emit the definition and move on to the next one.
- EmitGlobalDefinition(D);
+ EmitGlobalDefinition(D, GV);
}
}
@@ -1096,10 +1095,8 @@
llvm::GlobalValue *GV) {
assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
// Get the struct elements for these annotations.
- for (specific_attr_iterator<AnnotateAttr>
- ai = D->specific_attr_begin<AnnotateAttr>(),
- ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai)
- Annotations.push_back(EmitAnnotateAttr(GV, *ai, D->getLocation()));
+ for (const auto *I : D->specific_attrs<AnnotateAttr>())
+ Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation()));
}
bool CodeGenModule::MayDeferGeneration(const ValueDecl *Global) {
@@ -1196,12 +1193,14 @@
if (!FD->doesDeclarationForceExternallyVisibleDefinition())
return;
- const FunctionDecl *InlineDefinition = 0;
- FD->getBody(InlineDefinition);
-
StringRef MangledName = getMangledName(GD);
- DeferredDecls.erase(MangledName);
- EmitGlobalDefinition(InlineDefinition);
+
+ // Compute the function info and LLVM type.
+ const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+ llvm::Type *Ty = getTypes().GetFunctionType(FI);
+
+ GetOrCreateLLVMFunction(MangledName, Ty, GD, /*ForVTable=*/false,
+ /*DontDefer=*/false);
return;
}
} else {
@@ -1231,8 +1230,8 @@
// If the value has already been used, add it directly to the
// DeferredDeclsToEmit list.
StringRef MangledName = getMangledName(GD);
- if (GetGlobalValue(MangledName))
- DeferredDeclsToEmit.push_back(GD);
+ if (llvm::GlobalValue *GV = GetGlobalValue(MangledName))
+ addDeferredDeclToEmit(GV, GD);
else {
// Otherwise, remember that we saw a deferred decl with this name. The
// first use of the mangled name will cause it to move into
@@ -1301,8 +1300,7 @@
if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
return true;
const FunctionDecl *F = cast<FunctionDecl>(GD.getDecl());
- if (CodeGenOpts.OptimizationLevel == 0 &&
- !F->hasAttr<AlwaysInlineAttr>() && !F->hasAttr<ForceInlineAttr>())
+ if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>())
return false;
// PR9614. Avoid cases where the source code is lying to us. An available
// externally function should have an equivalent function somewhere else,
@@ -1329,7 +1327,7 @@
}
}
-void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD) {
+void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(),
@@ -1351,7 +1349,7 @@
else if (const CXXDestructorDecl *DD =dyn_cast<CXXDestructorDecl>(Method))
EmitCXXDestructor(DD, GD.getDtorType());
else
- EmitGlobalFunctionDefinition(GD);
+ EmitGlobalFunctionDefinition(GD, GV);
if (Method->isVirtual())
getVTables().EmitThunks(GD);
@@ -1359,7 +1357,7 @@
return;
}
- return EmitGlobalFunctionDefinition(GD);
+ return EmitGlobalFunctionDefinition(GD, GV);
}
if (const VarDecl *VD = dyn_cast<VarDecl>(D))
@@ -1379,6 +1377,7 @@
CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
llvm::Type *Ty,
GlobalDecl GD, bool ForVTable,
+ bool DontDefer,
llvm::AttributeSet ExtraAttrs) {
const Decl *D = GD.getDecl();
@@ -1398,14 +1397,6 @@
return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
}
- // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
- // each other bottoming out with the base dtor. Therefore we emit non-base
- // dtors on usage, even if there is no dtor definition in the TU.
- if (D && isa<CXXDestructorDecl>(D) &&
- getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
- GD.getDtorType()))
- DeferredDeclsToEmit.push_back(GD);
-
// This function doesn't have a complete type (for example, the return
// type is an incomplete struct). Use a fake type instead, and make
// sure not to try to set attributes.
@@ -1433,50 +1424,64 @@
B));
}
- // This is the first use or definition of a mangled name. If there is a
- // deferred decl with this name, remember that we need to emit it at the end
- // of the file.
- llvm::StringMap<GlobalDecl>::iterator DDI = DeferredDecls.find(MangledName);
- if (DDI != DeferredDecls.end()) {
- // Move the potentially referenced deferred decl to the DeferredDeclsToEmit
- // list, and remove it from DeferredDecls (since we don't need it anymore).
- DeferredDeclsToEmit.push_back(DDI->second);
- DeferredDecls.erase(DDI);
+ if (!DontDefer) {
+ // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
+ // each other bottoming out with the base dtor. Therefore we emit non-base
+ // dtors on usage, even if there is no dtor definition in the TU.
+ if (D && isa<CXXDestructorDecl>(D) &&
+ getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
+ GD.getDtorType()))
+ addDeferredDeclToEmit(F, GD);
- // Otherwise, if this is a sized deallocation function, emit a weak definition
- // for it at the end of the translation unit.
- } else if (D && cast<FunctionDecl>(D)
- ->getCorrespondingUnsizedGlobalDeallocationFunction()) {
- DeferredDeclsToEmit.push_back(GD);
+ // This is the first use or definition of a mangled name. If there is a
+ // deferred decl with this name, remember that we need to emit it at the end
+ // of the file.
+ llvm::StringMap<GlobalDecl>::iterator DDI = DeferredDecls.find(MangledName);
+ if (DDI != DeferredDecls.end()) {
+ // Move the potentially referenced deferred decl to the
+ // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
+ // don't need it anymore).
+ addDeferredDeclToEmit(F, DDI->second);
+ DeferredDecls.erase(DDI);
- // Otherwise, there are cases we have to worry about where we're
- // using a declaration for which we must emit a definition but where
- // we might not find a top-level definition:
- // - member functions defined inline in their classes
- // - friend functions defined inline in some class
- // - special member functions with implicit definitions
- // If we ever change our AST traversal to walk into class methods,
- // this will be unnecessary.
- //
- // We also don't emit a definition for a function if it's going to be an entry
- // in a vtable, unless it's already marked as used.
- } else if (getLangOpts().CPlusPlus && D) {
- // Look for a declaration that's lexically in a record.
- const FunctionDecl *FD = cast<FunctionDecl>(D);
- FD = FD->getMostRecentDecl();
- do {
- if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
- if (FD->isImplicit() && !ForVTable) {
- assert(FD->isUsed() && "Sema didn't mark implicit function as used!");
- DeferredDeclsToEmit.push_back(GD.getWithDecl(FD));
- break;
- } else if (FD->doesThisDeclarationHaveABody()) {
- DeferredDeclsToEmit.push_back(GD.getWithDecl(FD));
- break;
+ // Otherwise, if this is a sized deallocation function, emit a weak
+ // definition
+ // for it at the end of the translation unit.
+ } else if (D && cast<FunctionDecl>(D)
+ ->getCorrespondingUnsizedGlobalDeallocationFunction()) {
+ addDeferredDeclToEmit(F, GD);
+
+ // Otherwise, there are cases we have to worry about where we're
+ // using a declaration for which we must emit a definition but where
+ // we might not find a top-level definition:
+ // - member functions defined inline in their classes
+ // - friend functions defined inline in some class
+ // - special member functions with implicit definitions
+ // If we ever change our AST traversal to walk into class methods,
+ // this will be unnecessary.
+ //
+ // We also don't emit a definition for a function if it's going to be an
+ // entry
+ // in a vtable, unless it's already marked as used.
+ } else if (getLangOpts().CPlusPlus && D) {
+ // Look for a declaration that's lexically in a record.
+ const FunctionDecl *FD = cast<FunctionDecl>(D);
+ FD = FD->getMostRecentDecl();
+ do {
+ if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
+ if (FD->isImplicit() && !ForVTable) {
+ assert(FD->isUsed() &&
+ "Sema didn't mark implicit function as used!");
+ addDeferredDeclToEmit(F, GD.getWithDecl(FD));
+ break;
+ } else if (FD->doesThisDeclarationHaveABody()) {
+ addDeferredDeclToEmit(F, GD.getWithDecl(FD));
+ break;
+ }
}
- }
- FD = FD->getPreviousDecl();
- } while (FD);
+ FD = FD->getPreviousDecl();
+ } while (FD);
+ }
}
// Make sure the result is of the requested type.
@@ -1494,13 +1499,14 @@
/// create it (this occurs when we see a definition of the function).
llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
llvm::Type *Ty,
- bool ForVTable) {
+ bool ForVTable,
+ bool DontDefer) {
// If there was no specific requested type, just convert it now.
if (!Ty)
Ty = getTypes().ConvertType(cast<ValueDecl>(GD.getDecl())->getType());
StringRef MangledName = getMangledName(GD);
- return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable);
+ return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer);
}
/// CreateRuntimeFunction - Create a new runtime function with the specified
@@ -1509,9 +1515,9 @@
CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy,
StringRef Name,
llvm::AttributeSet ExtraAttrs) {
- llvm::Constant *C
- = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
- ExtraAttrs);
+ llvm::Constant *C =
+ GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
+ /*DontDefer=*/false, ExtraAttrs);
if (llvm::Function *F = dyn_cast<llvm::Function>(C))
if (F->empty())
F->setCallingConv(getRuntimeCC());
@@ -1571,6 +1577,13 @@
return llvm::ConstantExpr::getBitCast(Entry, Ty);
}
+ unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
+ llvm::GlobalVariable *GV =
+ new llvm::GlobalVariable(getModule(), Ty->getElementType(), false,
+ llvm::GlobalValue::ExternalLinkage,
+ 0, MangledName, 0,
+ llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+
// This is the first use or definition of a mangled name. If there is a
// deferred decl with this name, remember that we need to emit it at the end
// of the file.
@@ -1578,17 +1591,10 @@
if (DDI != DeferredDecls.end()) {
// Move the potentially referenced deferred decl to the DeferredDeclsToEmit
// list, and remove it from DeferredDecls (since we don't need it anymore).
- DeferredDeclsToEmit.push_back(DDI->second);
+ addDeferredDeclToEmit(GV, DDI->second);
DeferredDecls.erase(DDI);
}
- unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(), Ty->getElementType(), false,
- llvm::GlobalValue::ExternalLinkage,
- 0, MangledName, 0,
- llvm::GlobalVariable::NotThreadLocal, AddrSpace);
-
// Handle things which are present even on external declarations.
if (D) {
// FIXME: This code is overly simple and should be merged with other global
@@ -1600,9 +1606,10 @@
if (LV.getLinkage() != ExternalLinkage) {
// Don't set internal linkage on declarations.
} else {
- if (D->hasAttr<DLLImportAttr>())
- GV->setLinkage(llvm::GlobalValue::DLLImportLinkage);
- else if (D->hasAttr<WeakAttr>() || D->isWeakImported())
+ if (D->hasAttr<DLLImportAttr>()) {
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ } else if (D->hasAttr<WeakAttr>() || D->isWeakImported())
GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
// Set visibility on a declaration only if it's explicit.
@@ -1627,6 +1634,12 @@
if (AddrSpace != Ty->getAddressSpace())
return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
+ if (getTarget().getTriple().getArch() == llvm::Triple::xcore &&
+ D->getLanguageLinkage() == CLanguageLinkage &&
+ D->getType().isConstant(Context) &&
+ isExternallyVisible(D->getLinkageAndVisibility().getLinkage()))
+ GV->setSection(".cp.rodata");
+
return GV;
}
@@ -1875,6 +1888,10 @@
llvm::GlobalValue::LinkageTypes Linkage =
GetLLVMLinkageVarDefinition(D, GV->isConstant());
GV->setLinkage(Linkage);
+ if (D->hasAttr<DLLImportAttr>())
+ GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+ else if (D->hasAttr<DLLExportAttr>())
+ GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
// If required by the ABI, give definitions of static data members with inline
// initializers linkonce_odr linkage.
@@ -1917,9 +1934,9 @@
if (Linkage == GVA_Internal)
return llvm::Function::InternalLinkage;
else if (D->hasAttr<DLLImportAttr>())
- return llvm::Function::DLLImportLinkage;
+ return llvm::Function::ExternalLinkage;
else if (D->hasAttr<DLLExportAttr>())
- return llvm::Function::DLLExportLinkage;
+ return llvm::Function::ExternalLinkage;
else if (D->hasAttr<SelectAnyAttr>()) {
// selectany symbols are externally visible, so use weak instead of
// linkonce. MSVC optimizes away references to const selectany globals, so
@@ -1931,15 +1948,14 @@
return llvm::GlobalVariable::WeakODRLinkage;
else
return llvm::GlobalVariable::WeakAnyLinkage;
- } else if (Linkage == GVA_TemplateInstantiation ||
- Linkage == GVA_ExplicitTemplateInstantiation)
+ } else if (Linkage == GVA_TemplateInstantiation || Linkage == GVA_StrongODR)
return llvm::GlobalVariable::WeakODRLinkage;
else if (!getLangOpts().CPlusPlus &&
- ((!CodeGenOpts.NoCommon && !D->getAttr<NoCommonAttr>()) ||
- D->getAttr<CommonAttr>()) &&
+ ((!CodeGenOpts.NoCommon && !D->hasAttr<NoCommonAttr>()) ||
+ D->hasAttr<CommonAttr>()) &&
!D->hasExternalStorage() && !D->getInit() &&
- !D->getAttr<SectionAttr>() && !D->getTLSKind() &&
- !D->getAttr<WeakImportAttr>()) {
+ !D->hasAttr<SectionAttr>() && !D->getTLSKind() &&
+ !D->hasAttr<WeakImportAttr>()) {
// Thread local vars aren't considered common linkage.
return llvm::GlobalVariable::CommonLinkage;
} else if (D->getTLSKind() == VarDecl::TLS_Dynamic &&
@@ -1965,7 +1981,7 @@
for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end();
ui != ue; ) {
llvm::Value::use_iterator use = ui++; // Increment before the use is erased.
- llvm::User *user = *use;
+ llvm::User *user = use->getUser();
// Recognize and replace uses of bitcasts. Most calls to
// unprototyped functions will use bitcasts.
@@ -1978,7 +1994,7 @@
// Recognize calls to the function.
llvm::CallSite callSite(user);
if (!callSite) continue;
- if (!callSite.isCallee(use)) continue;
+ if (!callSite.isCallee(&*use)) continue;
// If the return types don't match exactly, then we can't
// transform this call unless it's dead.
@@ -2087,7 +2103,8 @@
EmitTopLevelDecl(VD);
}
-void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
+void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
+ llvm::GlobalValue *GV) {
const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
// Compute the function info and LLVM type.
@@ -2095,7 +2112,9 @@
llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
// Get or create the prototype for the function.
- llvm::Constant *Entry = GetAddrOfFunction(GD, Ty);
+ llvm::Constant *Entry =
+ GV ? GV
+ : GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer*/ true);
// Strip off a bitcast if we got one back.
if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
@@ -2174,6 +2193,10 @@
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
+
+ llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
+ if (PGOInit)
+ AddGlobalCtor(PGOInit, 0);
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -2235,9 +2258,9 @@
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
// The dllexport attribute is ignored for undefined symbols.
if (FD->hasBody())
- GA->setLinkage(llvm::Function::DLLExportLinkage);
+ GA->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
} else {
- GA->setLinkage(llvm::Function::DLLExportLinkage);
+ GA->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
}
} else if (D->hasAttr<WeakAttr>() ||
D->hasAttr<WeakRefAttr>() ||
@@ -2355,30 +2378,25 @@
C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey());
}
- llvm::GlobalValue::LinkageTypes Linkage;
- if (isUTF16)
- // FIXME: why do utf strings get "_" labels instead of "L" labels?
- Linkage = llvm::GlobalValue::InternalLinkage;
- else
- // FIXME: With OS X ld 123.2 (xcode 4) and LTO we would get a linker error
- // when using private linkage. It is not clear if this is a bug in ld
- // or a reasonable new restriction.
- Linkage = llvm::GlobalValue::LinkerPrivateLinkage;
-
// Note: -fwritable-strings doesn't make the backing store strings of
// CFStrings writable. (See <rdar://problem/10657500>)
llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
- Linkage, C, ".str");
+ new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, C, ".str");
GV->setUnnamedAddr(true);
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
+ // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. Without
+ // it LLVM can merge the string with a non unnamed_addr one during LTO. Doing
+ // that changes the section it ends in, which surprises ld64.
if (isUTF16) {
CharUnits Align = getContext().getTypeAlignInChars(getContext().ShortTy);
GV->setAlignment(Align.getQuantity());
+ GV->setSection("__TEXT,__ustring");
} else {
CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
GV->setAlignment(Align.getQuantity());
+ GV->setSection("__TEXT,__cstring,cstring_literals");
}
// String.
@@ -2397,23 +2415,12 @@
GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
llvm::GlobalVariable::PrivateLinkage, C,
"_unnamed_cfstring_");
- if (const char *Sect = getTarget().getCFStringSection())
- GV->setSection(Sect);
+ GV->setSection("__DATA,__cfstring");
Entry.setValue(GV);
return GV;
}
-static RecordDecl *
-CreateRecordDecl(const ASTContext &Ctx, RecordDecl::TagKind TK,
- DeclContext *DC, IdentifierInfo *Id) {
- SourceLocation Loc;
- if (Ctx.getLangOpts().CPlusPlus)
- return CXXRecordDecl::Create(Ctx, TK, DC, Loc, Loc, Id);
- else
- return RecordDecl::Create(Ctx, TK, DC, Loc, Loc, Id);
-}
-
llvm::Constant *
CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
unsigned StringLength = 0;
@@ -2456,9 +2463,7 @@
if (!NSConstantStringType) {
// Construct the type for a constant NSString.
- RecordDecl *D = CreateRecordDecl(Context, TTK_Struct,
- Context.getTranslationUnitDecl(),
- &Context.Idents.get("__builtin_NSString"));
+ RecordDecl *D = Context.buildImplicitRecord("__builtin_NSString");
D->startDefinition();
QualType FieldTypes[3];
@@ -2521,12 +2526,13 @@
GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
llvm::GlobalVariable::PrivateLinkage, C,
"_unnamed_nsstring_");
+ const char *NSStringSection = "__OBJC,__cstring_object,regular,no_dead_strip";
+ const char *NSStringNonFragileABISection =
+ "__DATA,__objc_stringobj,regular,no_dead_strip";
// FIXME. Fix section.
- if (const char *Sect =
- LangOpts.ObjCRuntime.isNonFragile()
- ? getTarget().getNSStringNonFragileABISection()
- : getTarget().getNSStringSection())
- GV->setSection(Sect);
+ GV->setSection(LangOpts.ObjCRuntime.isNonFragile()
+ ? NSStringNonFragileABISection
+ : NSStringSection);
Entry.setValue(GV);
return GV;
@@ -2534,9 +2540,7 @@
QualType CodeGenModule::getObjCFastEnumerationStateType() {
if (ObjCFastEnumerationStateType.isNull()) {
- RecordDecl *D = CreateRecordDecl(Context, TTK_Struct,
- Context.getTranslationUnitDecl(),
- &Context.Idents.get("__objcFastEnumerationState"));
+ RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState");
D->startDefinition();
QualType FieldTypes[] = {
@@ -2613,25 +2617,67 @@
llvm::Constant *
CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType());
- if (S->isAscii() || S->isUTF8()) {
- SmallString<64> Str(S->getString());
-
- // Resize the string to the right size, which is indicated by its type.
- const ConstantArrayType *CAT = Context.getAsConstantArrayType(S->getType());
- Str.resize(CAT->getSize().getZExtValue());
- return GetAddrOfConstantString(Str, /*GlobalName*/ 0, Align.getQuantity());
+
+ llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr;
+ llvm::GlobalVariable *GV = nullptr;
+ if (!LangOpts.WritableStrings) {
+ llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr;
+ switch (S->getCharByteWidth()) {
+ case 1:
+ ConstantStringMap = &Constant1ByteStringMap;
+ break;
+ case 2:
+ ConstantStringMap = &Constant2ByteStringMap;
+ break;
+ case 4:
+ ConstantStringMap = &Constant4ByteStringMap;
+ break;
+ default:
+ llvm_unreachable("unhandled byte width!");
+ }
+ Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes());
+ GV = Entry->getValue();
}
- // FIXME: the following does not memoize wide strings.
- llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(),C->getType(),
- !LangOpts.WritableStrings,
- llvm::GlobalValue::PrivateLinkage,
- C,".str");
+ if (!GV) {
+ SmallString<256> MangledNameBuffer;
+ StringRef GlobalVariableName;
+ llvm::GlobalValue::LinkageTypes LT;
- GV->setAlignment(Align.getQuantity());
- GV->setUnnamedAddr(true);
+ // Mangle the string literal if the ABI allows for it. However, we cannot
+ // do this if we are compiling with ASan or -fwritable-strings because they
+ // rely on strings having normal linkage.
+ if (!LangOpts.WritableStrings && !SanOpts.Address &&
+ getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) {
+ llvm::raw_svector_ostream Out(MangledNameBuffer);
+ getCXXABI().getMangleContext().mangleStringLiteral(S, Out);
+ Out.flush();
+
+ LT = llvm::GlobalValue::LinkOnceODRLinkage;
+ GlobalVariableName = MangledNameBuffer;
+ } else {
+ LT = llvm::GlobalValue::PrivateLinkage;;
+ GlobalVariableName = ".str";
+ }
+
+ // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant);
+
+ llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
+ GV = new llvm::GlobalVariable(
+ getModule(), C->getType(), !LangOpts.WritableStrings, LT, C,
+ GlobalVariableName, /*InsertBefore=*/nullptr,
+ llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+ GV->setUnnamedAddr(true);
+ if (Entry)
+ Entry->setValue(GV);
+ }
+
+ if (Align.getQuantity() > GV->getAlignment())
+ GV->setAlignment(Align.getQuantity());
+
return GV;
}
@@ -2656,7 +2702,7 @@
llvm::Constant *C =
llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, false);
- // OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
+ // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
unsigned AddrSpace = 0;
if (CGM.getLangOpts().OpenCL)
AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
@@ -2695,7 +2741,7 @@
return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment);
llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
- ConstantStringMap.GetOrCreateValue(Str);
+ Constant1ByteStringMap.GetOrCreateValue(Str);
if (llvm::GlobalVariable *GV = Entry.getValue()) {
if (Alignment > GV->getAlignment()) {
@@ -2793,10 +2839,7 @@
/// properties for an implementation.
void CodeGenModule::EmitObjCPropertyImplementations(const
ObjCImplementationDecl *D) {
- for (ObjCImplementationDecl::propimpl_iterator
- i = D->propimpl_begin(), e = D->propimpl_end(); i != e; ++i) {
- ObjCPropertyImplDecl *PID = *i;
-
+ for (const auto *PID : D->property_impls()) {
// Dynamic is just for type-checking.
if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) {
ObjCPropertyDecl *PD = PID->getPropertyDecl();
@@ -2871,13 +2914,12 @@
/// EmitNamespace - Emit all declarations in a namespace.
void CodeGenModule::EmitNamespace(const NamespaceDecl *ND) {
- for (RecordDecl::decl_iterator I = ND->decls_begin(), E = ND->decls_end();
- I != E; ++I) {
- if (const VarDecl *VD = dyn_cast<VarDecl>(*I))
+ for (auto *I : ND->decls()) {
+ if (const auto *VD = dyn_cast<VarDecl>(I))
if (VD->getTemplateSpecializationKind() != TSK_ExplicitSpecialization &&
VD->getTemplateSpecializationKind() != TSK_Undeclared)
continue;
- EmitTopLevelDecl(*I);
+ EmitTopLevelDecl(I);
}
}
@@ -2889,17 +2931,14 @@
return;
}
- for (RecordDecl::decl_iterator I = LSD->decls_begin(), E = LSD->decls_end();
- I != E; ++I) {
+ for (auto *I : LSD->decls()) {
// Meta-data for ObjC class includes references to implemented methods.
// Generate class's method definitions first.
- if (ObjCImplDecl *OID = dyn_cast<ObjCImplDecl>(*I)) {
- for (ObjCContainerDecl::method_iterator M = OID->meth_begin(),
- MEnd = OID->meth_end();
- M != MEnd; ++M)
- EmitTopLevelDecl(*M);
+ if (auto *OID = dyn_cast<ObjCImplDecl>(I)) {
+ for (auto *M : OID->methods())
+ EmitTopLevelDecl(M);
}
- EmitTopLevelDecl(*I);
+ EmitTopLevelDecl(I);
}
}
@@ -2940,7 +2979,6 @@
break;
// No code generation needed.
case Decl::UsingShadow:
- case Decl::Using:
case Decl::ClassTemplate:
case Decl::VarTemplate:
case Decl::VarTemplatePartialSpecialization:
@@ -2949,6 +2987,10 @@
case Decl::Block:
case Decl::Empty:
break;
+ case Decl::Using: // using X; [C++]
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ DI->EmitUsingDecl(cast<UsingDecl>(*D));
+ return;
case Decl::NamespaceAlias:
if (CGDebugInfo *DI = getModuleDebugInfo())
DI->EmitNamespaceAlias(cast<NamespaceAliasDecl>(*D));
@@ -3048,7 +3090,15 @@
ImportedModules.insert(Import->getImportedModule());
break;
- }
+ }
+
+ case Decl::ClassTemplateSpecialization: {
+ const ClassTemplateSpecializationDecl *Spec =
+ cast<ClassTemplateSpecializationDecl>(D);
+ if (DebugInfo &&
+ Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition)
+ DebugInfo->completeTemplateDefinition(*Spec);
+ }
default:
// Make sure we handled everything we should, every other kind is a
@@ -3094,7 +3144,7 @@
IdentifierInfo *Name = I->first;
llvm::GlobalValue *Val = I->second;
if (Val && !getModule().getNamedValue(Name->getName()))
- AddUsedGlobal(new llvm::GlobalAlias(Val->getType(), Val->getLinkage(),
+ addUsedGlobal(new llvm::GlobalAlias(Val->getType(), Val->getLinkage(),
Name->getName(), Val, &getModule()));
}
}
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index c161224..0d13bdc 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -30,7 +30,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Transforms/Utils/SpecialCaseList.h"
namespace llvm {
@@ -85,7 +85,8 @@
class CGCUDARuntime;
class BlockFieldFlags;
class FunctionArgList;
-
+ class PGOProfileData;
+
struct OrderGlobalInits {
unsigned int priority;
unsigned int lex_order;
@@ -98,10 +99,8 @@
}
bool operator<(const OrderGlobalInits &RHS) const {
- if (priority < RHS.priority)
- return true;
-
- return priority == RHS.priority && lex_order < RHS.lex_order;
+ return std::tie(priority, lex_order) <
+ std::tie(RHS.priority, RHS.lex_order);
}
};
@@ -236,7 +235,7 @@
DiagnosticsEngine &Diags;
const llvm::DataLayout &TheDataLayout;
const TargetInfo &Target;
- CGCXXABI &ABI;
+ std::unique_ptr<CGCXXABI> ABI;
llvm::LLVMContext &VMContext;
CodeGenTBAA *TBAA;
@@ -258,6 +257,7 @@
ARCEntrypoints *ARCData;
llvm::MDNode *NoObjCARCExceptionsMetadata;
RREntrypoints *RRData;
+ PGOProfileData *PGOData;
// WeakRefReferences - A set of references that have only been seen via
// a weakref so far. This is used to remove the weak of the reference if we
@@ -273,7 +273,15 @@
/// DeferredDeclsToEmit - This is a list of deferred decls which we have seen
/// that *are* actually referenced. These get code generated when the module
/// is done.
- std::vector<GlobalDecl> DeferredDeclsToEmit;
+ struct DeferredGlobal {
+ DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {}
+ llvm::AssertingVH<llvm::GlobalValue> GV;
+ GlobalDecl GD;
+ };
+ std::vector<DeferredGlobal> DeferredDeclsToEmit;
+ void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) {
+ DeferredDeclsToEmit.push_back(DeferredGlobal(GV, GD));
+ }
/// List of alias we have emitted. Used to make sure that what they point to
/// is defined once we get to the end of the of the translation unit.
@@ -290,6 +298,7 @@
/// forcing visibility of symbols which may otherwise be optimized
/// out.
std::vector<llvm::WeakVH> LLVMUsed;
+ std::vector<llvm::WeakVH> LLVMCompilerUsed;
/// GlobalCtors - Store the list of global constructors and their respective
/// priorities to be emitted when the translation unit is complete.
@@ -310,7 +319,10 @@
llvm::StringMap<llvm::Constant*> AnnotationStrings;
llvm::StringMap<llvm::Constant*> CFConstantStringMap;
- llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap;
+
+ llvm::StringMap<llvm::GlobalVariable *> Constant1ByteStringMap;
+ llvm::StringMap<llvm::GlobalVariable *> Constant2ByteStringMap;
+ llvm::StringMap<llvm::GlobalVariable *> Constant4ByteStringMap;
llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap;
llvm::DenseMap<const Decl*, llvm::GlobalVariable*> StaticLocalDeclGuardMap;
llvm::DenseMap<const Expr*, llvm::Constant *> MaterializedGlobalTemporaryMap;
@@ -421,7 +433,7 @@
GlobalDecl initializedGlobalDecl;
- llvm::OwningPtr<llvm::SpecialCaseList> SanitizerBlacklist;
+ std::unique_ptr<llvm::SpecialCaseList> SanitizerBlacklist;
const SanitizerOptions &SanOpts;
@@ -433,6 +445,8 @@
~CodeGenModule();
+ void clear();
+
/// Release - Finalize LLVM code generation.
void Release();
@@ -469,6 +483,10 @@
return *RRData;
}
+ PGOProfileData *getPGOData() const {
+ return PGOData;
+ }
+
llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
return StaticLocalDeclMap[D];
}
@@ -525,7 +543,7 @@
DiagnosticsEngine &getDiags() const { return Diags; }
const llvm::DataLayout &getDataLayout() const { return TheDataLayout; }
const TargetInfo &getTarget() const { return Target; }
- CGCXXABI &getCXXABI() { return ABI; }
+ CGCXXABI &getCXXABI() const { return *ABI; }
llvm::LLVMContext &getLLVMContext() { return VMContext; }
bool shouldUseTBAA() const { return TBAA != 0; }
@@ -577,21 +595,6 @@
/// for the thread-local variable declaration D.
void setTLSMode(llvm::GlobalVariable *GV, const VarDecl &D) const;
- /// TypeVisibilityKind - The kind of global variable that is passed to
- /// setTypeVisibility
- enum TypeVisibilityKind {
- TVK_ForVTT,
- TVK_ForVTable,
- TVK_ForConstructionVTable,
- TVK_ForRTTI,
- TVK_ForRTTIName
- };
-
- /// setTypeVisibility - Set the visibility for the given global
- /// value which holds information about a type.
- void setTypeVisibility(llvm::GlobalValue *GV, const CXXRecordDecl *D,
- TypeVisibilityKind TVK) const;
-
static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) {
switch (V) {
case DefaultVisibility: return llvm::GlobalValue::DefaultVisibility;
@@ -639,9 +642,9 @@
/// GetAddrOfFunction - Return the address of the given function. If Ty is
/// non-null, then this function will use the specified type if it has to
/// create it.
- llvm::Constant *GetAddrOfFunction(GlobalDecl GD,
- llvm::Type *Ty = 0,
- bool ForVTable = false);
+ llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = 0,
+ bool ForVTable = false,
+ bool DontDefer = false);
/// GetAddrOfRTTIDescriptor - Get the address of the RTTI descriptor
/// for the given type.
@@ -769,14 +772,16 @@
/// given type.
llvm::GlobalValue *GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
CXXCtorType ctorType,
- const CGFunctionInfo *fnInfo = 0);
+ const CGFunctionInfo *fnInfo = 0,
+ bool DontDefer = false);
/// GetAddrOfCXXDestructor - Return the address of the constructor of the
/// given type.
llvm::GlobalValue *GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
CXXDtorType dtorType,
const CGFunctionInfo *fnInfo = 0,
- llvm::FunctionType *fnType = 0);
+ llvm::FunctionType *fnType = 0,
+ bool DontDefer = false);
/// getBuiltinLibFunction - Given a builtin id for a function like
/// "__builtin_fabsf", return a Function* for "fabsf".
@@ -798,10 +803,11 @@
template<typename SomeDecl>
void MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV);
- /// AddUsedGlobal - Add a global which should be forced to be
- /// present in the object file; these are emitted to the llvm.used
- /// metadata global.
- void AddUsedGlobal(llvm::GlobalValue *GV);
+ /// Add a global to a list to be added to the llvm.used metadata.
+ void addUsedGlobal(llvm::GlobalValue *GV);
+
+ /// Add a global to a list to be added to the llvm.compiler.used metadata.
+ void addCompilerUsedGlobal(llvm::GlobalValue *GV);
/// AddCXXDtorEntry - Add a destructor and object to add to the C++ global
/// destructor function.
@@ -902,6 +908,10 @@
/// as a return type.
bool ReturnTypeUsesSRet(const CGFunctionInfo &FI);
+ /// ReturnSlotInterferesWithArgs - Return true iff the given type uses an
+ /// argument slot when 'sret' is used as a return type.
+ bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI);
+
/// ReturnTypeUsesFPRet - Return true iff the given type uses 'fpret' when
/// used as a return type.
bool ReturnTypeUsesFPRet(QualType ResultType);
@@ -1009,12 +1019,11 @@
private:
llvm::GlobalValue *GetGlobalValue(StringRef Ref);
- llvm::Constant *GetOrCreateLLVMFunction(StringRef MangledName,
- llvm::Type *Ty,
- GlobalDecl D,
- bool ForVTable,
- llvm::AttributeSet ExtraAttrs =
- llvm::AttributeSet());
+ llvm::Constant *
+ GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
+ bool ForVTable, bool DontDefer = false,
+ llvm::AttributeSet ExtraAttrs = llvm::AttributeSet());
+
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
llvm::PointerType *PTy,
const VarDecl *D,
@@ -1037,9 +1046,9 @@
llvm::Function *F,
bool IsIncompleteFunction);
- void EmitGlobalDefinition(GlobalDecl D);
+ void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = 0);
- void EmitGlobalFunctionDefinition(GlobalDecl GD);
+ void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
void EmitGlobalVarDefinition(const VarDecl *D);
void EmitAliasDefinition(GlobalDecl GD);
void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
@@ -1104,9 +1113,8 @@
/// still have a use for.
void EmitDeferredVTables();
- /// EmitLLVMUsed - Emit the llvm.used metadata used to force
- /// references to global which may otherwise be optimized out.
- void EmitLLVMUsed();
+ /// Emit the llvm.used and llvm.compiler.used metadata.
+ void emitLLVMUsed();
/// \brief Emit the link options introduced by imported modules.
void EmitModuleLinkOptions();
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
new file mode 100644
index 0000000..ac6b7bc
--- /dev/null
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -0,0 +1,1033 @@
+//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenPGO.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/FileSystem.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
+ DiagnosticsEngine &Diags = CGM.getDiags();
+ unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0");
+ Diags.Report(diagID) << Message;
+}
+
+PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
+ : CGM(CGM) {
+ if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
+ ReportBadPGOData(CGM, "failed to open pgo data file");
+ return;
+ }
+
+ if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
+ ReportBadPGOData(CGM, "pgo data file too big");
+ return;
+ }
+
+ // Scan through the data file and map each function to the corresponding
+ // file offset where its counts are stored.
+ const char *BufferStart = DataBuffer->getBufferStart();
+ const char *BufferEnd = DataBuffer->getBufferEnd();
+ const char *CurPtr = BufferStart;
+ uint64_t MaxCount = 0;
+ while (CurPtr < BufferEnd) {
+ // Read the function name.
+ const char *FuncStart = CurPtr;
+ // For Objective-C methods, the name may include whitespace, so search
+ // backward from the end of the line to find the space that separates the
+ // name from the number of counters. (This is a temporary hack since we are
+ // going to completely replace this file format in the near future.)
+ CurPtr = strchr(CurPtr, '\n');
+ if (!CurPtr) {
+ ReportBadPGOData(CGM, "pgo data file has malformed function entry");
+ return;
+ }
+ StringRef FuncName(FuncStart, CurPtr - FuncStart);
+
+ // Skip over the function hash.
+ CurPtr = strchr(++CurPtr, '\n');
+ if (!CurPtr) {
+ ReportBadPGOData(CGM, "pgo data file is missing the function hash");
+ return;
+ }
+
+ // Read the number of counters.
+ char *EndPtr;
+ unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+ if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
+ ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
+ return;
+ }
+ CurPtr = EndPtr;
+
+ // Read function count.
+ uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
+ if (EndPtr == CurPtr || *EndPtr != '\n') {
+ ReportBadPGOData(CGM, "pgo-data file has bad count value");
+ return;
+ }
+ CurPtr = EndPtr; // Point to '\n'.
+ FunctionCounts[FuncName] = Count;
+ MaxCount = Count > MaxCount ? Count : MaxCount;
+
+ // There is one line for each counter; skip over those lines.
+ // Since function count is already read, we start the loop from 1.
+ for (unsigned N = 1; N < NumCounters; ++N) {
+ CurPtr = strchr(++CurPtr, '\n');
+ if (!CurPtr) {
+ ReportBadPGOData(CGM, "pgo data file is missing some counter info");
+ return;
+ }
+ }
+
+ // Skip over the blank line separating functions.
+ CurPtr += 2;
+
+ DataOffsets[FuncName] = FuncStart - BufferStart;
+ }
+ MaxFunctionCount = MaxCount;
+}
+
+bool PGOProfileData::getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
+ std::vector<uint64_t> &Counts) {
+ // Find the relevant section of the pgo-data file.
+ llvm::StringMap<unsigned>::const_iterator OffsetIter =
+ DataOffsets.find(FuncName);
+ if (OffsetIter == DataOffsets.end())
+ return true;
+ const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
+
+ // Skip over the function name.
+ CurPtr = strchr(CurPtr, '\n');
+ assert(CurPtr && "pgo-data has corrupted function entry");
+
+ char *EndPtr;
+ // Read the function hash.
+ FuncHash = strtoll(++CurPtr, &EndPtr, 10);
+ assert(EndPtr != CurPtr && *EndPtr == '\n' &&
+ "pgo-data file has corrupted function hash");
+ CurPtr = EndPtr;
+
+ // Read the number of counters.
+ unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+ assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
+ "pgo-data file has corrupted number of counters");
+ CurPtr = EndPtr;
+
+ Counts.reserve(NumCounters);
+
+ for (unsigned N = 0; N < NumCounters; ++N) {
+ // Read the count value.
+ uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
+ if (EndPtr == CurPtr || *EndPtr != '\n') {
+ ReportBadPGOData(CGM, "pgo-data file has bad count value");
+ return true;
+ }
+ Counts.push_back(Count);
+ CurPtr = EndPtr + 1;
+ }
+
+ // Make sure the number of counters matches up.
+ if (Counts.size() != NumCounters) {
+ ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
+ return true;
+ }
+
+ return false;
+}
+
+void CodeGenPGO::setFuncName(llvm::Function *Fn) {
+ RawFuncName = Fn->getName();
+
+ // Function names may be prefixed with a binary '1' to indicate
+ // that the backend should not modify the symbols due to any platform
+ // naming convention. Do not include that '1' in the PGO profile name.
+ if (RawFuncName[0] == '\1')
+ RawFuncName = RawFuncName.substr(1);
+
+ if (!Fn->hasLocalLinkage()) {
+ PrefixedFuncName.reset(new std::string(RawFuncName));
+ return;
+ }
+
+ // For local symbols, prepend the main file name to distinguish them.
+ // Do not include the full path in the file name since there's no guarantee
+ // that it will stay the same, e.g., if the files are checked out from
+ // version control in different locations.
+ PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
+ if (PrefixedFuncName->empty())
+ PrefixedFuncName->assign("<unknown>");
+ PrefixedFuncName->append(":");
+ PrefixedFuncName->append(RawFuncName);
+}
+
+static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
+ return CGM.getModule().getFunction("__llvm_profile_register_functions");
+}
+
+static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
+ // Don't do this for Darwin. compiler-rt uses linker magic.
+ if (CGM.getTarget().getTriple().isOSDarwin())
+ return nullptr;
+
+ // Only need to insert this once per module.
+ if (llvm::Function *RegisterF = getRegisterFunc(CGM))
+ return &RegisterF->getEntryBlock();
+
+ // Construct the function.
+ auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+ auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
+ auto *RegisterF = llvm::Function::Create(RegisterFTy,
+ llvm::GlobalValue::InternalLinkage,
+ "__llvm_profile_register_functions",
+ &CGM.getModule());
+ RegisterF->setUnnamedAddr(true);
+ if (CGM.getCodeGenOpts().DisableRedZone)
+ RegisterF->addFnAttr(llvm::Attribute::NoRedZone);
+
+ // Construct and return the entry block.
+ auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
+ CGBuilderTy Builder(BB);
+ Builder.CreateRetVoid();
+ return BB;
+}
+
+static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
+ auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+ auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
+ auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+ return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
+ RuntimeRegisterTy);
+}
+
+static bool isMachO(const CodeGenModule &CGM) {
+ return CGM.getTarget().getTriple().isOSBinFormatMachO();
+}
+
+static StringRef getCountersSection(const CodeGenModule &CGM) {
+ return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
+}
+
+static StringRef getNameSection(const CodeGenModule &CGM) {
+ return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
+}
+
+static StringRef getDataSection(const CodeGenModule &CGM) {
+ return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
+}
+
+llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
+ // Create name variable.
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
+ false);
+ auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
+ true, VarLinkage, VarName,
+ getFuncVarName("name"));
+ Name->setSection(getNameSection(CGM));
+ Name->setAlignment(1);
+
+ // Create data variable.
+ auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+ auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
+ auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
+ auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
+ llvm::Type *DataTypes[] = {
+ Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
+ };
+ auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
+ llvm::Constant *DataVals[] = {
+ llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
+ llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
+ llvm::ConstantInt::get(Int64Ty, FunctionHash),
+ llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
+ llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
+ };
+ auto *Data =
+ new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
+ llvm::ConstantStruct::get(DataTy, DataVals),
+ getFuncVarName("data"));
+
+ // All the data should be packed into an array in its own section.
+ Data->setSection(getDataSection(CGM));
+ Data->setAlignment(8);
+
+ // Make sure the data doesn't get deleted.
+ CGM.addUsedGlobal(Data);
+ return Data;
+}
+
+void CodeGenPGO::emitInstrumentationData() {
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+ return;
+
+ // Build the data.
+ auto *Data = buildDataVar();
+
+ // Register the data.
+ auto *RegisterBB = getOrInsertRegisterBB(CGM);
+ if (!RegisterBB)
+ return;
+ CGBuilderTy Builder(RegisterBB->getTerminator());
+ auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
+ Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
+ Builder.CreateBitCast(Data, VoidPtrTy));
+}
+
+llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+ return nullptr;
+
+ // Only need to create this once per module.
+ if (CGM.getModule().getFunction("__llvm_profile_init"))
+ return nullptr;
+
+ // Get the function to call at initialization.
+ llvm::Constant *RegisterF = getRegisterFunc(CGM);
+ if (!RegisterF)
+ return nullptr;
+
+ // Create the initialization function.
+ auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+ auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
+ llvm::GlobalValue::InternalLinkage,
+ "__llvm_profile_init", &CGM.getModule());
+ F->setUnnamedAddr(true);
+ F->addFnAttr(llvm::Attribute::NoInline);
+ if (CGM.getCodeGenOpts().DisableRedZone)
+ F->addFnAttr(llvm::Attribute::NoRedZone);
+
+ // Add the basic block and the necessary calls.
+ CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
+ Builder.CreateCall(RegisterF);
+ Builder.CreateRetVoid();
+
+ return F;
+}
+
+namespace {
+ /// A StmtVisitor that fills a map of statements to PGO counters.
+ struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
+ /// The next counter value to assign.
+ unsigned NextCounter;
+ /// The map of statements to counters.
+ llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
+
+ MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
+ : NextCounter(0), CounterMap(CounterMap) {}
+
+ void VisitChildren(const Stmt *S) {
+ for (Stmt::const_child_range I = S->children(); I; ++I)
+ if (*I)
+ this->Visit(*I);
+ }
+ void VisitStmt(const Stmt *S) { VisitChildren(S); }
+
+ /// Assign a counter to track entry to the function body.
+ void VisitFunctionDecl(const FunctionDecl *S) {
+ CounterMap[S->getBody()] = NextCounter++;
+ Visit(S->getBody());
+ }
+ void VisitObjCMethodDecl(const ObjCMethodDecl *S) {
+ CounterMap[S->getBody()] = NextCounter++;
+ Visit(S->getBody());
+ }
+ void VisitBlockDecl(const BlockDecl *S) {
+ CounterMap[S->getBody()] = NextCounter++;
+ Visit(S->getBody());
+ }
+ /// Assign a counter to track the block following a label.
+ void VisitLabelStmt(const LabelStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign a counter for the body of a while loop.
+ void VisitWhileStmt(const WhileStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getCond());
+ Visit(S->getBody());
+ }
+ /// Assign a counter for the body of a do-while loop.
+ void VisitDoStmt(const DoStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getBody());
+ Visit(S->getCond());
+ }
+ /// Assign a counter for the body of a for loop.
+ void VisitForStmt(const ForStmt *S) {
+ CounterMap[S] = NextCounter++;
+ if (S->getInit())
+ Visit(S->getInit());
+ const Expr *E;
+ if ((E = S->getCond()))
+ Visit(E);
+ if ((E = S->getInc()))
+ Visit(E);
+ Visit(S->getBody());
+ }
+ /// Assign a counter for the body of a for-range loop.
+ void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getRangeStmt());
+ Visit(S->getBeginEndStmt());
+ Visit(S->getCond());
+ Visit(S->getLoopVarStmt());
+ Visit(S->getBody());
+ Visit(S->getInc());
+ }
+ /// Assign a counter for the body of a for-collection loop.
+ void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getElement());
+ Visit(S->getBody());
+ }
+ /// Assign a counter for the exit block of the switch statement.
+ void VisitSwitchStmt(const SwitchStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getCond());
+ Visit(S->getBody());
+ }
+ /// Assign a counter for a particular case in a switch. This counts jumps
+ /// from the switch header as well as fallthrough from the case before this
+ /// one.
+ void VisitCaseStmt(const CaseStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign a counter for the default case of a switch statement. The count
+ /// is the number of branches from the loop header to the default, and does
+ /// not include fallthrough from previous cases. If we have multiple
+ /// conditional branch blocks from the switch instruction to the default
+ /// block, as with large GNU case ranges, this is the counter for the last
+ /// edge in that series, rather than the first.
+ void VisitDefaultStmt(const DefaultStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign a counter for the "then" part of an if statement. The count for
+ /// the "else" part, if it exists, will be calculated from this counter.
+ void VisitIfStmt(const IfStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getCond());
+ Visit(S->getThen());
+ if (S->getElse())
+ Visit(S->getElse());
+ }
+ /// Assign a counter for the continuation block of a C++ try statement.
+ void VisitCXXTryStmt(const CXXTryStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getTryBlock());
+ for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+ Visit(S->getHandler(I));
+ }
+ /// Assign a counter for a catch statement's handler block.
+ void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+ CounterMap[S] = NextCounter++;
+ Visit(S->getHandlerBlock());
+ }
+ /// Assign a counter for the "true" part of a conditional operator. The
+ /// count in the "false" part will be calculated from this counter.
+ void VisitConditionalOperator(const ConditionalOperator *E) {
+ CounterMap[E] = NextCounter++;
+ Visit(E->getCond());
+ Visit(E->getTrueExpr());
+ Visit(E->getFalseExpr());
+ }
+ /// Assign a counter for the right hand side of a logical and operator.
+ void VisitBinLAnd(const BinaryOperator *E) {
+ CounterMap[E] = NextCounter++;
+ Visit(E->getLHS());
+ Visit(E->getRHS());
+ }
+ /// Assign a counter for the right hand side of a logical or operator.
+ void VisitBinLOr(const BinaryOperator *E) {
+ CounterMap[E] = NextCounter++;
+ Visit(E->getLHS());
+ Visit(E->getRHS());
+ }
+ };
+
+ /// A StmtVisitor that propagates the raw counts through the AST and
+ /// records the count at statements where the value may change.
+ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
+ /// PGO state.
+ CodeGenPGO &PGO;
+
+ /// A flag that is set when the current count should be recorded on the
+ /// next statement, such as at the exit of a loop.
+ bool RecordNextStmtCount;
+
+ /// The map of statements to count values.
+ llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
+
+ /// BreakContinueStack - Keep counts of breaks and continues inside loops.
+ struct BreakContinue {
+ uint64_t BreakCount;
+ uint64_t ContinueCount;
+ BreakContinue() : BreakCount(0), ContinueCount(0) {}
+ };
+ SmallVector<BreakContinue, 8> BreakContinueStack;
+
+ ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
+ CodeGenPGO &PGO)
+ : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
+
+ void RecordStmtCount(const Stmt *S) {
+ if (RecordNextStmtCount) {
+ CountMap[S] = PGO.getCurrentRegionCount();
+ RecordNextStmtCount = false;
+ }
+ }
+
+ void VisitStmt(const Stmt *S) {
+ RecordStmtCount(S);
+ for (Stmt::const_child_range I = S->children(); I; ++I) {
+ if (*I)
+ this->Visit(*I);
+ }
+ }
+
+ void VisitFunctionDecl(const FunctionDecl *S) {
+ RegionCounter Cnt(PGO, S->getBody());
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ }
+
+ void VisitObjCMethodDecl(const ObjCMethodDecl *S) {
+ RegionCounter Cnt(PGO, S->getBody());
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ }
+
+ void VisitBlockDecl(const BlockDecl *S) {
+ RegionCounter Cnt(PGO, S->getBody());
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ }
+
+ void VisitReturnStmt(const ReturnStmt *S) {
+ RecordStmtCount(S);
+ if (S->getRetValue())
+ Visit(S->getRetValue());
+ PGO.setCurrentRegionUnreachable();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitGotoStmt(const GotoStmt *S) {
+ RecordStmtCount(S);
+ PGO.setCurrentRegionUnreachable();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitLabelStmt(const LabelStmt *S) {
+ RecordNextStmtCount = false;
+ RegionCounter Cnt(PGO, S);
+ Cnt.beginRegion();
+ CountMap[S] = PGO.getCurrentRegionCount();
+ Visit(S->getSubStmt());
+ }
+
+ void VisitBreakStmt(const BreakStmt *S) {
+ RecordStmtCount(S);
+ assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
+ BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
+ PGO.setCurrentRegionUnreachable();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitContinueStmt(const ContinueStmt *S) {
+ RecordStmtCount(S);
+ assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
+ BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
+ PGO.setCurrentRegionUnreachable();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitWhileStmt(const WhileStmt *S) {
+ RecordStmtCount(S);
+ RegionCounter Cnt(PGO, S);
+ BreakContinueStack.push_back(BreakContinue());
+ // Visit the body region first so the break/continue adjustments can be
+ // included when visiting the condition.
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ Cnt.adjustForControlFlow();
+
+ // ...then go back and propagate counts through the condition. The count
+ // at the start of the condition is the sum of the incoming edges,
+ // the backedge from the end of the loop body, and the edges from
+ // continue statements.
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+ Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+ Cnt.getAdjustedCount() + BC.ContinueCount);
+ CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+ Visit(S->getCond());
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitDoStmt(const DoStmt *S) {
+ RecordStmtCount(S);
+ RegionCounter Cnt(PGO, S);
+ BreakContinueStack.push_back(BreakContinue());
+ Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ Cnt.adjustForControlFlow();
+
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+ // The count at the start of the condition is equal to the count at the
+ // end of the body. The adjusted count does not include either the
+ // fall-through count coming into the loop or the continue count, so add
+ // both of those separately. This is coincidentally the same equation as
+ // with while loops but for different reasons.
+ Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+ Cnt.getAdjustedCount() + BC.ContinueCount);
+ CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+ Visit(S->getCond());
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitForStmt(const ForStmt *S) {
+ RecordStmtCount(S);
+ if (S->getInit())
+ Visit(S->getInit());
+ RegionCounter Cnt(PGO, S);
+ BreakContinueStack.push_back(BreakContinue());
+ // Visit the body region first. (This is basically the same as a while
+ // loop; see further comments in VisitWhileStmt.)
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ Cnt.adjustForControlFlow();
+
+ // The increment is essentially part of the body but it needs to include
+ // the count for all the continue statements.
+ if (S->getInc()) {
+ Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
+ BreakContinueStack.back().ContinueCount);
+ CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+ Visit(S->getInc());
+ Cnt.adjustForControlFlow();
+ }
+
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+
+ // ...then go back and propagate counts through the condition.
+ if (S->getCond()) {
+ Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+ Cnt.getAdjustedCount() +
+ BC.ContinueCount);
+ CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+ Visit(S->getCond());
+ Cnt.adjustForControlFlow();
+ }
+ Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+ RecordStmtCount(S);
+ Visit(S->getRangeStmt());
+ Visit(S->getBeginEndStmt());
+ RegionCounter Cnt(PGO, S);
+ BreakContinueStack.push_back(BreakContinue());
+ // Visit the body region first. (This is basically the same as a while
+ // loop; see further comments in VisitWhileStmt.)
+ Cnt.beginRegion();
+ CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
+ Visit(S->getLoopVarStmt());
+ Visit(S->getBody());
+ Cnt.adjustForControlFlow();
+
+ // The increment is essentially part of the body but it needs to include
+ // the count for all the continue statements.
+ Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
+ BreakContinueStack.back().ContinueCount);
+ CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+ Visit(S->getInc());
+ Cnt.adjustForControlFlow();
+
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+
+ // ...then go back and propagate counts through the condition.
+ Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+ Cnt.getAdjustedCount() +
+ BC.ContinueCount);
+ CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+ Visit(S->getCond());
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+ RecordStmtCount(S);
+ Visit(S->getElement());
+ RegionCounter Cnt(PGO, S);
+ BreakContinueStack.push_back(BreakContinue());
+ Cnt.beginRegion();
+ CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+ Visit(S->getBody());
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitSwitchStmt(const SwitchStmt *S) {
+ RecordStmtCount(S);
+ Visit(S->getCond());
+ PGO.setCurrentRegionUnreachable();
+ BreakContinueStack.push_back(BreakContinue());
+ Visit(S->getBody());
+ // If the switch is inside a loop, add the continue counts.
+ BreakContinue BC = BreakContinueStack.pop_back_val();
+ if (!BreakContinueStack.empty())
+ BreakContinueStack.back().ContinueCount += BC.ContinueCount;
+ RegionCounter ExitCnt(PGO, S);
+ ExitCnt.beginRegion();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitCaseStmt(const CaseStmt *S) {
+ RecordNextStmtCount = false;
+ RegionCounter Cnt(PGO, S);
+ Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+ CountMap[S] = Cnt.getCount();
+ RecordNextStmtCount = true;
+ Visit(S->getSubStmt());
+ }
+
+ void VisitDefaultStmt(const DefaultStmt *S) {
+ RecordNextStmtCount = false;
+ RegionCounter Cnt(PGO, S);
+ Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+ CountMap[S] = Cnt.getCount();
+ RecordNextStmtCount = true;
+ Visit(S->getSubStmt());
+ }
+
+ void VisitIfStmt(const IfStmt *S) {
+ RecordStmtCount(S);
+ RegionCounter Cnt(PGO, S);
+ Visit(S->getCond());
+
+ Cnt.beginRegion();
+ CountMap[S->getThen()] = PGO.getCurrentRegionCount();
+ Visit(S->getThen());
+ Cnt.adjustForControlFlow();
+
+ if (S->getElse()) {
+ Cnt.beginElseRegion();
+ CountMap[S->getElse()] = PGO.getCurrentRegionCount();
+ Visit(S->getElse());
+ Cnt.adjustForControlFlow();
+ }
+ Cnt.applyAdjustmentsToRegion(0);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitCXXTryStmt(const CXXTryStmt *S) {
+ RecordStmtCount(S);
+ Visit(S->getTryBlock());
+ for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+ Visit(S->getHandler(I));
+ RegionCounter Cnt(PGO, S);
+ Cnt.beginRegion();
+ RecordNextStmtCount = true;
+ }
+
+ void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+ RecordNextStmtCount = false;
+ RegionCounter Cnt(PGO, S);
+ Cnt.beginRegion();
+ CountMap[S] = PGO.getCurrentRegionCount();
+ Visit(S->getHandlerBlock());
+ }
+
+ void VisitConditionalOperator(const ConditionalOperator *E) {
+ RecordStmtCount(E);
+ RegionCounter Cnt(PGO, E);
+ Visit(E->getCond());
+
+ Cnt.beginRegion();
+ CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
+ Visit(E->getTrueExpr());
+ Cnt.adjustForControlFlow();
+
+ Cnt.beginElseRegion();
+ CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
+ Visit(E->getFalseExpr());
+ Cnt.adjustForControlFlow();
+
+ Cnt.applyAdjustmentsToRegion(0);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitBinLAnd(const BinaryOperator *E) {
+ RecordStmtCount(E);
+ RegionCounter Cnt(PGO, E);
+ Visit(E->getLHS());
+ Cnt.beginRegion();
+ CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+ Visit(E->getRHS());
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(0);
+ RecordNextStmtCount = true;
+ }
+
+ void VisitBinLOr(const BinaryOperator *E) {
+ RecordStmtCount(E);
+ RegionCounter Cnt(PGO, E);
+ Visit(E->getLHS());
+ Cnt.beginRegion();
+ CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+ Visit(E->getRHS());
+ Cnt.adjustForControlFlow();
+ Cnt.applyAdjustmentsToRegion(0);
+ RecordNextStmtCount = true;
+ }
+ };
+}
+
+static void emitRuntimeHook(CodeGenModule &CGM) {
+ LLVM_CONSTEXPR const char *RuntimeVarName = "__llvm_profile_runtime";
+ LLVM_CONSTEXPR const char *RuntimeUserName = "__llvm_profile_runtime_user";
+ if (CGM.getModule().getGlobalVariable(RuntimeVarName))
+ return;
+
+ // Declare the runtime hook.
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+ auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
+ llvm::GlobalValue::ExternalLinkage,
+ nullptr, RuntimeVarName);
+
+ // Make a function that uses it.
+ auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
+ llvm::GlobalValue::LinkOnceODRLinkage,
+ RuntimeUserName, &CGM.getModule());
+ User->addFnAttr(llvm::Attribute::NoInline);
+ if (CGM.getCodeGenOpts().DisableRedZone)
+ User->addFnAttr(llvm::Attribute::NoRedZone);
+ CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
+ auto *Load = Builder.CreateLoad(Var);
+ Builder.CreateRet(Load);
+
+ // Create a use of the function. Now the definition of the runtime variable
+ // should get pulled in, along with any static initializears.
+ CGM.addUsedGlobal(User);
+}
+
+void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
+ bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+ PGOProfileData *PGOData = CGM.getPGOData();
+ if (!InstrumentRegions && !PGOData)
+ return;
+ if (!D)
+ return;
+ setFuncName(Fn);
+
+ // Set the linkage for variables based on the function linkage. Usually, we
+ // want to match it, but available_externally and extern_weak both have the
+ // wrong semantics.
+ VarLinkage = Fn->getLinkage();
+ switch (VarLinkage) {
+ case llvm::GlobalValue::ExternalWeakLinkage:
+ VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
+ break;
+ case llvm::GlobalValue::AvailableExternallyLinkage:
+ VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
+ break;
+ default:
+ break;
+ }
+
+ mapRegionCounters(D);
+ if (InstrumentRegions) {
+ emitRuntimeHook(CGM);
+ emitCounterVariables();
+ }
+ if (PGOData) {
+ loadRegionCounts(PGOData);
+ computeRegionCounts(D);
+ applyFunctionAttributes(PGOData, Fn);
+ }
+}
+
+void CodeGenPGO::mapRegionCounters(const Decl *D) {
+ RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
+ MapRegionCounters Walker(*RegionCounterMap);
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+ Walker.VisitFunctionDecl(FD);
+ else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
+ Walker.VisitObjCMethodDecl(MD);
+ else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
+ Walker.VisitBlockDecl(BD);
+ NumRegionCounters = Walker.NextCounter;
+ // FIXME: The number of counters isn't sufficient for the hash
+ FunctionHash = NumRegionCounters;
+}
+
+void CodeGenPGO::computeRegionCounts(const Decl *D) {
+ StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
+ ComputeRegionCounts Walker(*StmtCountMap, *this);
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+ Walker.VisitFunctionDecl(FD);
+ else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
+ Walker.VisitObjCMethodDecl(MD);
+ else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
+ Walker.VisitBlockDecl(BD);
+}
+
+void CodeGenPGO::applyFunctionAttributes(PGOProfileData *PGOData,
+ llvm::Function *Fn) {
+ if (!haveRegionCounts())
+ return;
+
+ uint64_t MaxFunctionCount = PGOData->getMaximumFunctionCount();
+ uint64_t FunctionCount = getRegionCount(0);
+ if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
+ // Turn on InlineHint attribute for hot functions.
+ // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
+ Fn->addFnAttr(llvm::Attribute::InlineHint);
+ else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
+ // Turn on Cold attribute for cold functions.
+ // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
+ Fn->addFnAttr(llvm::Attribute::Cold);
+}
+
+void CodeGenPGO::emitCounterVariables() {
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
+ NumRegionCounters);
+ RegionCounters =
+ new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
+ llvm::Constant::getNullValue(CounterTy),
+ getFuncVarName("counters"));
+ RegionCounters->setAlignment(8);
+ RegionCounters->setSection(getCountersSection(CGM));
+}
+
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
+ if (!RegionCounters)
+ return;
+ llvm::Value *Addr =
+ Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
+ llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Addr);
+}
+
+void CodeGenPGO::loadRegionCounts(PGOProfileData *PGOData) {
+ // For now, ignore the counts from the PGO data file only if the number of
+ // counters does not match. This could be tightened down in the future to
+ // ignore counts when the input changes in various ways, e.g., by comparing a
+ // hash value based on some characteristics of the input.
+ RegionCounts.reset(new std::vector<uint64_t>);
+ uint64_t Hash;
+ if (PGOData->getFunctionCounts(getFuncName(), Hash, *RegionCounts) ||
+ Hash != FunctionHash || RegionCounts->size() != NumRegionCounters)
+ RegionCounts.reset();
+}
+
+void CodeGenPGO::destroyRegionCounters() {
+ RegionCounterMap.reset();
+ StmtCountMap.reset();
+ RegionCounts.reset();
+}
+
+/// \brief Calculate what to divide by to scale weights.
+///
+/// Given the maximum weight, calculate a divisor that will scale all the
+/// weights to strictly less than UINT32_MAX.
+static uint64_t calculateWeightScale(uint64_t MaxWeight) {
+ return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
+}
+
+/// \brief Scale an individual branch weight (and add 1).
+///
+/// Scale a 64-bit weight down to 32-bits using \c Scale.
+///
+/// According to Laplace's Rule of Succession, it is better to compute the
+/// weight based on the count plus 1, so universally add 1 to the value.
+///
+/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
+/// greater than \c Weight.
+static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
+ assert(Scale && "scale by 0?");
+ uint64_t Scaled = Weight / Scale + 1;
+ assert(Scaled <= UINT32_MAX && "overflow 32-bits");
+ return Scaled;
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
+ uint64_t FalseCount) {
+ // Check for empty weights.
+ if (!TrueCount && !FalseCount)
+ return nullptr;
+
+ // Calculate how to scale down to 32-bits.
+ uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
+
+ llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+ return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
+ scaleBranchWeight(FalseCount, Scale));
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
+ // We need at least two elements to create meaningful weights.
+ if (Weights.size() < 2)
+ return nullptr;
+
+ // Calculate how to scale down to 32-bits.
+ uint64_t Scale = calculateWeightScale(*std::max_element(Weights.begin(),
+ Weights.end()));
+
+ SmallVector<uint32_t, 16> ScaledWeights;
+ ScaledWeights.reserve(Weights.size());
+ for (uint64_t W : Weights)
+ ScaledWeights.push_back(scaleBranchWeight(W, Scale));
+
+ llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+ return MDHelper.createBranchWeights(ScaledWeights);
+}
+
+llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
+ RegionCounter &Cnt) {
+ if (!haveRegionCounts())
+ return nullptr;
+ uint64_t LoopCount = Cnt.getCount();
+ uint64_t CondCount = 0;
+ bool Found = getStmtCount(Cond, CondCount);
+ assert(Found && "missing expected loop condition count");
+ (void)Found;
+ if (CondCount == 0)
+ return nullptr;
+ return createBranchWeights(LoopCount,
+ std::max(CondCount, LoopCount) - LoopCount);
+}
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
new file mode 100644
index 0000000..c59a58e
--- /dev/null
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -0,0 +1,256 @@
+//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CODEGENPGO_H
+#define CLANG_CODEGEN_CODEGENPGO_H
+
+#include "CGBuilder.h"
+#include "CodeGenModule.h"
+#include "CodeGenTypes.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace clang {
+namespace CodeGen {
+class RegionCounter;
+
+/// The raw counter data from an instrumented PGO binary
+class PGOProfileData {
+private:
+ /// The PGO data
+ std::unique_ptr<llvm::MemoryBuffer> DataBuffer;
+ /// Offsets into DataBuffer for each function's counters
+ llvm::StringMap<unsigned> DataOffsets;
+ /// Execution counts for each function.
+ llvm::StringMap<uint64_t> FunctionCounts;
+ /// The maximal execution count among all functions.
+ uint64_t MaxFunctionCount;
+ CodeGenModule &CGM;
+public:
+ PGOProfileData(CodeGenModule &CGM, std::string Path);
+ /// Fill Counts with the profile data for the given function name. Returns
+ /// false on success.
+ bool getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
+ std::vector<uint64_t> &Counts);
+ /// Return the maximum of all known function counts.
+ uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
+};
+
+/// Per-function PGO state. This class should generally not be used directly,
+/// but instead through the CodeGenFunction and RegionCounter types.
+class CodeGenPGO {
+private:
+ CodeGenModule &CGM;
+ std::unique_ptr<std::string> PrefixedFuncName;
+ StringRef RawFuncName;
+ llvm::GlobalValue::LinkageTypes VarLinkage;
+
+ unsigned NumRegionCounters;
+ uint64_t FunctionHash;
+ llvm::GlobalVariable *RegionCounters;
+ std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
+ std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
+ std::unique_ptr<std::vector<uint64_t>> RegionCounts;
+ uint64_t CurrentRegionCount;
+
+public:
+ CodeGenPGO(CodeGenModule &CGM)
+ : CGM(CGM), NumRegionCounters(0), FunctionHash(0), RegionCounters(0),
+ CurrentRegionCount(0) {}
+
+ /// Whether or not we have PGO region data for the current function. This is
+ /// false both when we have no data at all and when our data has been
+ /// discarded.
+ bool haveRegionCounts() const { return RegionCounts != 0; }
+
+ /// Get the string used to identify this function in the profile data.
+ /// For functions with local linkage, this includes the main file name.
+ StringRef getFuncName() const { return StringRef(*PrefixedFuncName); }
+ std::string getFuncVarName(StringRef VarName) const {
+ return ("__llvm_profile_" + VarName + "_" + RawFuncName).str();
+ }
+
+ /// Return the counter value of the current region.
+ uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
+
+ /// Set the counter value for the current region. This is used to keep track
+ /// of changes to the most recent counter from control flow and non-local
+ /// exits.
+ void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
+
+ /// Indicate that the current region is never reached, and thus should have a
+ /// counter value of zero. This is important so that subsequent regions can
+ /// correctly track their parent counts.
+ void setCurrentRegionUnreachable() { setCurrentRegionCount(0); }
+
+ /// Check if an execution count is known for a given statement. If so, return
+ /// true and put the value in Count; else return false.
+ bool getStmtCount(const Stmt *S, uint64_t &Count) {
+ if (!StmtCountMap)
+ return false;
+ llvm::DenseMap<const Stmt*, uint64_t>::const_iterator
+ I = StmtCountMap->find(S);
+ if (I == StmtCountMap->end())
+ return false;
+ Count = I->second;
+ return true;
+ }
+
+ /// If the execution count for the current statement is known, record that
+ /// as the current count.
+ void setCurrentStmt(const Stmt *S) {
+ uint64_t Count;
+ if (getStmtCount(S, Count))
+ setCurrentRegionCount(Count);
+ }
+
+ /// Calculate branch weights appropriate for PGO data
+ llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
+ llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
+ llvm::MDNode *createLoopWeights(const Stmt *Cond, RegionCounter &Cnt);
+
+ /// Assign counters to regions and configure them for PGO of a given
+ /// function. Does nothing if instrumentation is not enabled and either
+ /// generates global variables or associates PGO data with each of the
+ /// counters depending on whether we are generating or using instrumentation.
+ void assignRegionCounters(const Decl *D, llvm::Function *Fn);
+ /// Emit static data structures for instrumentation data.
+ void emitInstrumentationData();
+ /// Clean up region counter state. Must be called if assignRegionCounters is
+ /// used.
+ void destroyRegionCounters();
+ /// Emit static initialization code, if any.
+ static llvm::Function *emitInitialization(CodeGenModule &CGM);
+
+private:
+ void setFuncName(llvm::Function *Fn);
+ void mapRegionCounters(const Decl *D);
+ void computeRegionCounts(const Decl *D);
+ void applyFunctionAttributes(PGOProfileData *PGOData, llvm::Function *Fn);
+ void loadRegionCounts(PGOProfileData *PGOData);
+ void emitCounterVariables();
+ llvm::GlobalVariable *buildDataVar();
+
+ /// Emit code to increment the counter at the given index
+ void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
+
+ /// Return the region counter for the given statement. This should only be
+ /// called on statements that have a dedicated counter.
+ unsigned getRegionCounter(const Stmt *S) {
+ if (RegionCounterMap == 0)
+ return 0;
+ return (*RegionCounterMap)[S];
+ }
+
+ /// Return the region count for the counter at the given index.
+ uint64_t getRegionCount(unsigned Counter) {
+ if (!haveRegionCounts())
+ return 0;
+ return (*RegionCounts)[Counter];
+ }
+
+ friend class RegionCounter;
+};
+
+/// A counter for a particular region. This is the primary interface through
+/// which clients manage PGO counters and their values.
+class RegionCounter {
+ CodeGenPGO *PGO;
+ unsigned Counter;
+ uint64_t Count;
+ uint64_t ParentCount;
+ uint64_t RegionCount;
+ int64_t Adjust;
+
+ RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
+ : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
+ ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+public:
+ RegionCounter(CodeGenPGO &PGO, const Stmt *S)
+ : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
+ Count(PGO.getRegionCount(Counter)),
+ ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+ /// Get the value of the counter. In most cases this is the number of times
+ /// the region of the counter was entered, but for switch labels it's the
+ /// number of direct jumps to that label.
+ uint64_t getCount() const { return Count; }
+
+ /// Get the value of the counter with adjustments applied. Adjustments occur
+ /// when control enters or leaves the region abnormally; i.e., if there is a
+ /// jump to a label within the region, or if the function can return from
+ /// within the region. The adjusted count, then, is the value of the counter
+ /// at the end of the region.
+ uint64_t getAdjustedCount() const {
+ return Count + Adjust;
+ }
+
+ /// Get the value of the counter in this region's parent, i.e., the region
+ /// that was active when this region began. This is useful for deriving
+ /// counts in implicitly counted regions, like the false case of a condition
+ /// or the normal exits of a loop.
+ uint64_t getParentCount() const { return ParentCount; }
+
+ /// Activate the counter by emitting an increment and starting to track
+ /// adjustments. If AddIncomingFallThrough is true, the current region count
+ /// will be added to the counter for the purposes of tracking the region.
+ void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
+ beginRegion(AddIncomingFallThrough);
+ PGO->emitCounterIncrement(Builder, Counter);
+ }
+ void beginRegion(bool AddIncomingFallThrough=false) {
+ RegionCount = Count;
+ if (AddIncomingFallThrough)
+ RegionCount += PGO->getCurrentRegionCount();
+ PGO->setCurrentRegionCount(RegionCount);
+ }
+
+ /// For counters on boolean branches, begins tracking adjustments for the
+ /// uncounted path.
+ void beginElseRegion() {
+ RegionCount = ParentCount - Count;
+ PGO->setCurrentRegionCount(RegionCount);
+ }
+
+ /// Reset the current region count.
+ void setCurrentRegionCount(uint64_t CurrentCount) {
+ RegionCount = CurrentCount;
+ PGO->setCurrentRegionCount(RegionCount);
+ }
+
+ /// Adjust for non-local control flow after emitting a subexpression or
+ /// substatement. This must be called to account for constructs such as gotos,
+ /// labels, and returns, so that we can ensure that our region's count is
+ /// correct in the code that follows.
+ void adjustForControlFlow() {
+ Adjust += PGO->getCurrentRegionCount() - RegionCount;
+ // Reset the region count in case this is called again later.
+ RegionCount = PGO->getCurrentRegionCount();
+ }
+
+ /// Commit all adjustments to the current region. If the region is a loop,
+ /// the LoopAdjust value should be the count of all the breaks and continues
+ /// from the loop, to compensate for those counts being deducted from the
+ /// adjustments for the body of the loop.
+ void applyAdjustmentsToRegion(uint64_t LoopAdjust) {
+ PGO->setCurrentRegionCount(ParentCount + Adjust + LoopAdjust);
+ }
+};
+
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 5f3c59c..b7f3746 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -38,10 +38,7 @@
}
CodeGenTypes::~CodeGenTypes() {
- for (llvm::DenseMap<const Type *, CGRecordLayout *>::iterator
- I = CGRecordLayouts.begin(), E = CGRecordLayouts.end();
- I != E; ++I)
- delete I->second;
+ llvm::DeleteContainerSeconds(CGRecordLayouts);
for (llvm::FoldingSet<CGFunctionInfo>::iterator
I = FunctionInfos.begin(), E = FunctionInfos.end(); I != E; )
@@ -134,17 +131,15 @@
// when a class is translated, even though they aren't embedded by-value into
// the class.
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
- for (CXXRecordDecl::base_class_const_iterator I = CRD->bases_begin(),
- E = CRD->bases_end(); I != E; ++I)
- if (!isSafeToConvert(I->getType()->getAs<RecordType>()->getDecl(),
+ for (const auto &I : CRD->bases())
+ if (!isSafeToConvert(I.getType()->getAs<RecordType>()->getDecl(),
CGT, AlreadyChecked))
return false;
}
// If this type would require laying out members that are currently being laid
// out, don't do it.
- for (RecordDecl::field_iterator I = RD->field_begin(),
- E = RD->field_end(); I != E; ++I)
+ for (const auto *I : RD->fields())
if (!isSafeToConvert(I->getType(), CGT, AlreadyChecked))
return false;
@@ -186,13 +181,12 @@
return isSafeToConvert(RD, CGT, AlreadyChecked);
}
-
-/// isFuncTypeArgumentConvertible - Return true if the specified type in a
-/// function argument or result position can be converted to an IR type at this
+/// isFuncParamTypeConvertible - Return true if the specified type in a
+/// function parameter or result position can be converted to an IR type at this
/// point. This boils down to being whether it is complete, as well as whether
/// we've temporarily deferred expanding the type because we're in a recursive
/// context.
-bool CodeGenTypes::isFuncTypeArgumentConvertible(QualType Ty) {
+bool CodeGenTypes::isFuncParamTypeConvertible(QualType Ty) {
// If this isn't a tagged type, we can convert it!
const TagType *TT = Ty->getAs<TagType>();
if (TT == 0) return true;
@@ -217,17 +211,17 @@
/// Code to verify a given function type is complete, i.e. the return type
-/// and all of the argument types are complete. Also check to see if we are in
+/// and all of the parameter types are complete. Also check to see if we are in
/// a RS_StructPointer context, and if so whether any struct types have been
/// pended. If so, we don't want to ask the ABI lowering code to handle a type
/// that cannot be converted to an IR type.
bool CodeGenTypes::isFuncTypeConvertible(const FunctionType *FT) {
- if (!isFuncTypeArgumentConvertible(FT->getResultType()))
+ if (!isFuncParamTypeConvertible(FT->getReturnType()))
return false;
if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
- for (unsigned i = 0, e = FPT->getNumArgs(); i != e; i++)
- if (!isFuncTypeArgumentConvertible(FPT->getArgType(i)))
+ for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+ if (!isFuncParamTypeConvertible(FPT->getParamType(i)))
return false;
return true;
@@ -479,11 +473,11 @@
// Force conversion of all the relevant record types, to make sure
// we re-convert the FunctionType when appropriate.
- if (const RecordType *RT = FT->getResultType()->getAs<RecordType>())
+ if (const RecordType *RT = FT->getReturnType()->getAs<RecordType>())
ConvertRecordDeclType(RT->getDecl());
if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
- for (unsigned i = 0, e = FPT->getNumArgs(); i != e; i++)
- if (const RecordType *RT = FPT->getArgType(i)->getAs<RecordType>())
+ for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+ if (const RecordType *RT = FPT->getParamType(i)->getAs<RecordType>())
ConvertRecordDeclType(RT->getDecl());
// Return a placeholder type.
@@ -493,7 +487,7 @@
break;
}
- // While we're converting the argument types for a function, we don't want
+ // While we're converting the parameter types for a function, we don't want
// to recursively convert any pointed-to structs. Converting directly-used
// structs is ok though.
if (!RecordsBeingLaidOut.insert(Ty)) {
@@ -655,11 +649,10 @@
// Force conversion of non-virtual base classes recursively.
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
- for (CXXRecordDecl::base_class_const_iterator i = CRD->bases_begin(),
- e = CRD->bases_end(); i != e; ++i) {
- if (i->isVirtual()) continue;
+ for (const auto &I : CRD->bases()) {
+ if (I.isVirtual()) continue;
- ConvertRecordDeclType(i->getType()->getAs<RecordType>()->getDecl());
+ ConvertRecordDeclType(I.getType()->getAs<RecordType>()->getDecl());
}
}
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 94ca9e2..59e3089 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -136,8 +136,8 @@
/// be converted to an LLVM type (i.e. doesn't depend on an incomplete tag
/// type).
bool isFuncTypeConvertible(const FunctionType *FT);
- bool isFuncTypeArgumentConvertible(QualType Ty);
-
+ bool isFuncParamTypeConvertible(QualType Ty);
+
/// GetFunctionTypeForVTable - Get the LLVM function type for use in a vtable,
/// given a CXXMethodDecl. If the method to has an incomplete return type,
/// and/or incomplete argument types, this will return the opaque type.
@@ -175,10 +175,10 @@
const CGFunctionInfo &arrangeGlobalDeclaration(GlobalDecl GD);
const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
- const CGFunctionInfo &arrangeFunctionDeclaration(QualType ResTy,
- const FunctionArgList &Args,
- const FunctionType::ExtInfo &Info,
- bool isVariadic);
+ const CGFunctionInfo &
+ arrangeFreeFunctionDeclaration(QualType ResTy, const FunctionArgList &Args,
+ const FunctionType::ExtInfo &Info,
+ bool isVariadic);
const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
@@ -188,6 +188,10 @@
const CGFunctionInfo &arrangeCXXConstructorDeclaration(
const CXXConstructorDecl *D,
CXXCtorType Type);
+ const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args,
+ const CXXConstructorDecl *D,
+ CXXCtorType CtorKind,
+ unsigned ExtraArgs);
const CGFunctionInfo &arrangeCXXDestructor(const CXXDestructorDecl *D,
CXXDtorType Type);
@@ -216,6 +220,7 @@
///
/// \param argTypes - must all actually be canonical as params
const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType,
+ bool IsInstanceMethod,
ArrayRef<CanQualType> argTypes,
FunctionType::ExtInfo info,
RequiredArgs args);
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index e9d9a33..166d420 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -19,8 +19,8 @@
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Value.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
namespace clang {
namespace CodeGen {
@@ -65,9 +65,9 @@
template <class T> struct DominatingValue : InvariantValue<T> {};
template <class T, bool mightBeInstruction =
- llvm::is_base_of<llvm::Value, T>::value &&
- !llvm::is_base_of<llvm::Constant, T>::value &&
- !llvm::is_base_of<llvm::BasicBlock, T>::value>
+ std::is_base_of<llvm::Value, T>::value &&
+ !std::is_base_of<llvm::Constant, T>::value &&
+ !std::is_base_of<llvm::BasicBlock, T>::value>
struct DominatingPointer;
template <class T> struct DominatingPointer<T,false> : InvariantValue<T*> {};
// template <class T> struct DominatingPointer<T,true> at end of file
@@ -182,7 +182,7 @@
typedef typename DominatingValue<A0>::saved_type A0_saved;
A0_saved a0_saved;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
T(a0).Emit(CGF, flags);
}
@@ -199,7 +199,7 @@
A0_saved a0_saved;
A1_saved a1_saved;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
T(a0, a1).Emit(CGF, flags);
@@ -219,7 +219,7 @@
A1_saved a1_saved;
A2_saved a2_saved;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
@@ -242,7 +242,7 @@
A2_saved a2_saved;
A3_saved a3_saved;
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 0e8f31a..27825ab 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -52,13 +52,13 @@
CGCXXABI(CGM), UseARMMethodPtrABI(UseARMMethodPtrABI),
UseARMGuardVarABI(UseARMGuardVarABI) { }
- bool isReturnTypeIndirect(const CXXRecordDecl *RD) const {
+ bool isReturnTypeIndirect(const CXXRecordDecl *RD) const override {
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always indirect.
return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor();
}
- RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const {
+ RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always indirect.
if (!RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
@@ -66,114 +66,117 @@
return RAA_Default;
}
- bool isZeroInitializable(const MemberPointerType *MPT);
+ bool isZeroInitializable(const MemberPointerType *MPT) override;
- llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT);
+ llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
- llvm::Value *EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemFnPtr,
- const MemberPointerType *MPT);
+ llvm::Value *
+ EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
+ const Expr *E,
+ llvm::Value *&This,
+ llvm::Value *MemFnPtr,
+ const MemberPointerType *MPT) override;
- llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ llvm::Value *
+ EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+ llvm::Value *Base,
+ llvm::Value *MemPtr,
+ const MemberPointerType *MPT) override;
llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
- llvm::Value *Src);
+ llvm::Value *Src) override;
llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
- llvm::Constant *Src);
+ llvm::Constant *Src) override;
- llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
+ llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
- llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD);
+ llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
- CharUnits offset);
- llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT);
+ CharUnits offset) override;
+ llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
llvm::Constant *BuildMemberPointer(const CXXMethodDecl *MD,
CharUnits ThisAdjustment);
llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
- llvm::Value *L,
- llvm::Value *R,
+ llvm::Value *L, llvm::Value *R,
const MemberPointerType *MPT,
- bool Inequality);
+ bool Inequality) override;
llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
- llvm::Value *Addr,
- const MemberPointerType *MPT);
+ llvm::Value *Addr,
+ const MemberPointerType *MPT) override;
- llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF,
- llvm::Value *ptr,
- QualType type);
+ llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF, llvm::Value *ptr,
+ QualType type) override;
- llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
- llvm::Value *This,
- const CXXRecordDecl *ClassDecl,
- const CXXRecordDecl *BaseClassDecl);
+ llvm::Value *
+ GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+ const CXXRecordDecl *ClassDecl,
+ const CXXRecordDecl *BaseClassDecl) override;
void BuildConstructorSignature(const CXXConstructorDecl *Ctor,
- CXXCtorType T,
- CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys);
+ CXXCtorType T, CanQualType &ResTy,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
- void EmitCXXConstructors(const CXXConstructorDecl *D);
+ void EmitCXXConstructors(const CXXConstructorDecl *D) override;
void BuildDestructorSignature(const CXXDestructorDecl *Dtor,
- CXXDtorType T,
- CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys);
+ CXXDtorType T, CanQualType &ResTy,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
- CXXDtorType DT) const {
+ CXXDtorType DT) const override {
// Itanium does not emit any destructor variant as an inline thunk.
// Delegating may occur as an optimization, but all variants are either
// emitted with external linkage or as linkonce if they are inline and used.
return false;
}
- void EmitCXXDestructors(const CXXDestructorDecl *D);
+ void EmitCXXDestructors(const CXXDestructorDecl *D) override;
- void BuildInstanceFunctionParams(CodeGenFunction &CGF,
- QualType &ResTy,
- FunctionArgList &Params);
+ void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+ FunctionArgList &Params) override;
- void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
+ void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- void EmitConstructorCall(CodeGenFunction &CGF,
- const CXXConstructorDecl *D, CXXCtorType Type,
- bool ForVirtualBase, bool Delegating,
- llvm::Value *This,
- CallExpr::const_arg_iterator ArgBeg,
- CallExpr::const_arg_iterator ArgEnd);
+ unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
+ const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating,
+ CallArgList &Args) override;
- void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD);
+ void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
+ CXXDtorType Type, bool ForVirtualBase,
+ bool Delegating, llvm::Value *This) override;
+
+ void emitVTableDefinitions(CodeGenVTables &CGVT,
+ const CXXRecordDecl *RD) override;
llvm::Value *getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
BaseSubobject Base, const CXXRecordDecl *NearestVBase,
- bool &NeedsVirtualOffset);
+ bool &NeedsVirtualOffset) override;
llvm::Constant *
getVTableAddressPointForConstExpr(BaseSubobject Base,
- const CXXRecordDecl *VTableClass);
+ const CXXRecordDecl *VTableClass) override;
llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
- CharUnits VPtrOffset);
+ CharUnits VPtrOffset) override;
llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
- llvm::Value *This, llvm::Type *Ty);
+ llvm::Value *This,
+ llvm::Type *Ty) override;
void EmitVirtualDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *Dtor,
CXXDtorType DtorType, SourceLocation CallLoc,
- llvm::Value *This);
+ llvm::Value *This) override;
- void emitVirtualInheritanceTables(const CXXRecordDecl *RD);
+ void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
- void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) {
+ void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) override {
// Allow inlining of thunks by emitting them with available_externally
// linkage together with vtables when needed.
if (ForVTable)
@@ -181,38 +184,40 @@
}
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
- const ThisAdjustment &TA);
+ const ThisAdjustment &TA) override;
llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
- const ReturnAdjustment &RA);
+ const ReturnAdjustment &RA) override;
- StringRef GetPureVirtualCallName() { return "__cxa_pure_virtual"; }
- StringRef GetDeletedVirtualCallName() { return "__cxa_deleted_virtual"; }
+ StringRef GetPureVirtualCallName() override { return "__cxa_pure_virtual"; }
+ StringRef GetDeletedVirtualCallName() override
+ { return "__cxa_deleted_virtual"; }
- CharUnits getArrayCookieSizeImpl(QualType elementType);
+ CharUnits getArrayCookieSizeImpl(QualType elementType) override;
llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
llvm::Value *NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
- QualType ElementType);
+ QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
llvm::Value *allocPtr,
- CharUnits cookieSize);
+ CharUnits cookieSize) override;
void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
- llvm::GlobalVariable *DeclPtr, bool PerformInit);
+ llvm::GlobalVariable *DeclPtr,
+ bool PerformInit) override;
void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
- llvm::Constant *dtor, llvm::Constant *addr);
+ llvm::Constant *dtor, llvm::Constant *addr) override;
llvm::Function *getOrCreateThreadLocalWrapper(const VarDecl *VD,
llvm::GlobalVariable *Var);
void EmitThreadLocalInitFuncs(
llvm::ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
- llvm::Function *InitFunc);
- LValue EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
- const DeclRefExpr *DRE);
+ llvm::Function *InitFunc) override;
+ LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
+ QualType LValType) override;
- bool NeedsVTTParameter(GlobalDecl GD);
+ bool NeedsVTTParameter(GlobalDecl GD) override;
};
class ARMCXXABI : public ItaniumCXXABI {
@@ -221,22 +226,31 @@
ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
/* UseARMGuardVarABI = */ true) {}
- bool HasThisReturn(GlobalDecl GD) const {
+ bool HasThisReturn(GlobalDecl GD) const override {
return (isa<CXXConstructorDecl>(GD.getDecl()) || (
isa<CXXDestructorDecl>(GD.getDecl()) &&
GD.getDtorType() != Dtor_Deleting));
}
- void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResTy);
+ void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV,
+ QualType ResTy) override;
- CharUnits getArrayCookieSizeImpl(QualType elementType);
+ CharUnits getArrayCookieSizeImpl(QualType elementType) override;
llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
llvm::Value *NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
- QualType ElementType);
+ QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, llvm::Value *allocPtr,
- CharUnits cookieSize);
+ CharUnits cookieSize) override;
+};
+
+class iOS64CXXABI : public ARMCXXABI {
+public:
+ iOS64CXXABI(CodeGen::CodeGenModule &CGM) : ARMCXXABI(CGM) {}
+
+ // ARM64 libraries are prepared for non-unique RTTI.
+ bool shouldRTTIBeUnique() override { return false; }
};
}
@@ -248,6 +262,9 @@
case TargetCXXABI::iOS:
return new ARMCXXABI(CGM);
+ case TargetCXXABI::iOS64:
+ return new iOS64CXXABI(CGM);
+
// Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
// include the other 32-bit ARM oddities: constructor/destructor return values
// and array cookies.
@@ -299,11 +316,9 @@
///
/// If the member is non-virtual, memptr.ptr is the address of
/// the function to call.
-llvm::Value *
-ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemFnPtr,
- const MemberPointerType *MPT) {
+llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+ llvm::Value *MemFnPtr, const MemberPointerType *MPT) {
CGBuilderTy &Builder = CGF.Builder;
const FunctionProtoType *FPT =
@@ -355,8 +370,7 @@
// Cast the adjusted this to a pointer to vtable pointer and load.
llvm::Type *VTableTy = Builder.getInt8PtrTy();
- llvm::Value *VTable = Builder.CreateBitCast(This, VTableTy->getPointerTo());
- VTable = Builder.CreateLoad(VTable, "memptr.vtable");
+ llvm::Value *VTable = CGF.GetVTablePtr(This, VTableTy);
// Apply the offset.
llvm::Value *VTableOffset = FnAsInt;
@@ -385,10 +399,9 @@
/// Compute an l-value by applying the given pointer-to-member to a
/// base object.
-llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT) {
+llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+ const MemberPointerType *MPT) {
assert(MemPtr->getType() == CGM.PtrDiffTy);
CGBuilderTy &Builder = CGF.Builder;
@@ -797,34 +810,35 @@
/// The generic ABI passes 'this', plus a VTT if it's initializing a
/// base subobject.
-void ItaniumCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
- CXXCtorType Type,
- CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys) {
+void
+ItaniumCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
+ CXXCtorType Type, CanQualType &ResTy,
+ SmallVectorImpl<CanQualType> &ArgTys) {
ASTContext &Context = getContext();
- // 'this' parameter is already there, as well as 'this' return if
- // HasThisReturn(GlobalDecl(Ctor, Type)) is true
+ // All parameters are already in place except VTT, which goes after 'this'.
+ // These are Clang types, so we don't need to worry about sret yet.
// Check if we need to add a VTT parameter (which has type void **).
if (Type == Ctor_Base && Ctor->getParent()->getNumVBases() != 0)
- ArgTys.push_back(Context.getPointerType(Context.VoidPtrTy));
+ ArgTys.insert(ArgTys.begin() + 1,
+ Context.getPointerType(Context.VoidPtrTy));
}
void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
// Just make sure we're in sync with TargetCXXABI.
assert(CGM.getTarget().getCXXABI().hasConstructorVariants());
+ // The constructor used for constructing this as a base class;
+ // ignores virtual bases.
+ CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
+
// The constructor used for constructing this as a complete class;
// constucts the virtual bases, then calls the base constructor.
if (!D->getParent()->isAbstract()) {
// We don't need to emit the complete ctor if the class is abstract.
CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
}
-
- // The constructor used for constructing this as a base class;
- // ignores virtual bases.
- CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
}
/// The generic ABI passes 'this', plus a VTT if it's destroying a
@@ -844,29 +858,26 @@
}
void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
- // The destructor in a virtual table is always a 'deleting'
- // destructor, which calls the complete destructor and then uses the
- // appropriate operator delete.
- if (D->isVirtual())
- CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
+ // The destructor used for destructing this as a base class; ignores
+ // virtual bases.
+ CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
// The destructor used for destructing this as a most-derived class;
// call the base destructor and then destructs any virtual bases.
CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete));
- // The destructor used for destructing this as a base class; ignores
- // virtual bases.
- CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
+ // The destructor in a virtual table is always a 'deleting'
+ // destructor, which calls the complete destructor and then uses the
+ // appropriate operator delete.
+ if (D->isVirtual())
+ CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
}
-void ItaniumCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
- QualType &ResTy,
- FunctionArgList &Params) {
- /// Create the 'this' variable.
- BuildThisParam(CGF, Params);
-
+void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
+ QualType &ResTy,
+ FunctionArgList &Params) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
- assert(MD->isInstance());
+ assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
// Check if we need a VTT parameter as well.
if (NeedsVTTParameter(CGF.CurGD)) {
@@ -877,8 +888,8 @@
ImplicitParamDecl *VTTDecl
= ImplicitParamDecl::Create(Context, 0, MD->getLocation(),
&Context.Idents.get("vtt"), T);
- Params.push_back(VTTDecl);
- getVTTDecl(CGF) = VTTDecl;
+ Params.insert(Params.begin() + 1, VTTDecl);
+ getStructorImplicitParamDecl(CGF) = VTTDecl;
}
}
@@ -887,10 +898,9 @@
EmitThisParam(CGF);
/// Initialize the 'vtt' slot if needed.
- if (getVTTDecl(CGF)) {
- getVTTValue(CGF)
- = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getVTTDecl(CGF)),
- "vtt");
+ if (getStructorImplicitParamDecl(CGF)) {
+ getStructorImplicitParamValue(CGF) = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)), "vtt");
}
/// If this is a function that the ABI specifies returns 'this', initialize
@@ -905,21 +915,39 @@
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
}
-void ItaniumCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type,
- bool ForVirtualBase, bool Delegating,
- llvm::Value *This,
- CallExpr::const_arg_iterator ArgBeg,
- CallExpr::const_arg_iterator ArgEnd) {
- llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase,
- Delegating);
+unsigned ItaniumCXXABI::addImplicitConstructorArgs(
+ CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+ bool ForVirtualBase, bool Delegating, CallArgList &Args) {
+ if (!NeedsVTTParameter(GlobalDecl(D, Type)))
+ return 0;
+
+ // Insert the implicit 'vtt' argument as the second argument.
+ llvm::Value *VTT =
+ CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
- llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
+ Args.insert(Args.begin() + 1,
+ CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
+ return 1; // Added one arg.
+}
+
+void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD,
+ CXXDtorType Type, bool ForVirtualBase,
+ bool Delegating, llvm::Value *This) {
+ GlobalDecl GD(DD, Type);
+ llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
+ QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
+
+ llvm::Value *Callee = 0;
+ if (getContext().getLangOpts().AppleKext)
+ Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
+
+ if (!Callee)
+ Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
// FIXME: Provide a source location here.
- CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(),
- This, VTT, VTTTy, ArgBeg, ArgEnd);
+ CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
+ VTT, VTTTy, 0, 0);
}
void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -942,7 +970,7 @@
VTable->setLinkage(Linkage);
// Set the right visibility.
- CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForVTable);
+ CGM.setGlobalVisibility(VTable, RD);
// If this is the magic class __cxxabiv1::__fundamental_type_info,
// we will emit the typeinfo for the fundamental types. This is the
@@ -1305,7 +1333,7 @@
llvm::GlobalVariable *Guard;
CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
- void Emit(CodeGenFunction &CGF, Flags flags) {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitNounwindRuntimeCall(getGuardAbortFn(CGF.CGM, Guard->getType()),
Guard);
}
@@ -1398,6 +1426,13 @@
// __cxa_guard_release (&obj_guard);
// }
// }
+
+ // ARM64 C++ ABI 3.2.2:
+ // This ABI instead only specifies the value bit 0 of the static guard
+ // variable; all other bits are platform defined. Bit 0 shall be 0 when the
+ // variable is not initialized and 1 when it is.
+ // FIXME: Reading one bit is no more efficient than reading one byte so
+ // the codegen is same as generic Itanium ABI.
} else {
// Load the first byte of the guard variable.
llvm::LoadInst *LI =
@@ -1523,8 +1558,6 @@
/// the wrapper emits a copy, and we want the linker to merge them.
static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(
llvm::GlobalValue::LinkageTypes VarLinkage) {
- if (llvm::GlobalValue::isLinkerPrivateLinkage(VarLinkage))
- return llvm::GlobalValue::LinkerPrivateWeakLinkage;
// For internal linkage variables, we don't need an external or weak wrapper.
if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
return VarLinkage;
@@ -1632,9 +1665,9 @@
}
}
-LValue ItaniumCXXABI::EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
- const DeclRefExpr *DRE) {
- const VarDecl *VD = cast<VarDecl>(DRE->getDecl());
+LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD,
+ QualType LValType) {
QualType T = VD->getType();
llvm::Type *Ty = CGF.getTypes().ConvertTypeForMem(T);
llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD, Ty);
@@ -1645,10 +1678,9 @@
LValue LV;
if (VD->getType()->isReferenceType())
- LV = CGF.MakeNaturalAlignAddrLValue(Val, T);
+ LV = CGF.MakeNaturalAlignAddrLValue(Val, LValType);
else
- LV = CGF.MakeAddrLValue(Val, DRE->getType(),
- CGF.getContext().getDeclAlign(VD));
+ LV = CGF.MakeAddrLValue(Val, LValType, CGF.getContext().getDeclAlign(VD));
// FIXME: need setObjCGCLValueClass?
return LV;
}
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 7318fe7..9832969 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -15,9 +15,8 @@
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
-#include "CodeGenModule.h"
#include "CGVTables.h"
-#include "MicrosoftVBTables.h"
+#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/VTableBuilder.h"
@@ -28,51 +27,63 @@
namespace {
+/// Holds all the vbtable globals for a given class.
+struct VBTableGlobals {
+ const VPtrInfoVector *VBTables;
+ SmallVector<llvm::GlobalVariable *, 2> Globals;
+};
+
class MicrosoftCXXABI : public CGCXXABI {
public:
MicrosoftCXXABI(CodeGenModule &CGM) : CGCXXABI(CGM) {}
- bool HasThisReturn(GlobalDecl GD) const;
+ bool HasThisReturn(GlobalDecl GD) const override;
- bool isReturnTypeIndirect(const CXXRecordDecl *RD) const {
+ bool isReturnTypeIndirect(const CXXRecordDecl *RD) const override {
// Structures that are not C++03 PODs are always indirect.
return !RD->isPOD();
}
- RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const {
- if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor())
- return RAA_DirectInMemory;
+ RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
+ if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor()) {
+ llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch();
+ if (Arch == llvm::Triple::x86)
+ return RAA_DirectInMemory;
+ // On x64, pass non-trivial records indirectly.
+ // FIXME: Test other Windows architectures.
+ return RAA_Indirect;
+ }
return RAA_Default;
}
- StringRef GetPureVirtualCallName() { return "_purecall"; }
+ StringRef GetPureVirtualCallName() override { return "_purecall"; }
// No known support for deleted functions in MSVC yet, so this choice is
// arbitrary.
- StringRef GetDeletedVirtualCallName() { return "_purecall"; }
+ StringRef GetDeletedVirtualCallName() override { return "_purecall"; }
- bool isInlineInitializedStaticDataMemberLinkOnce() { return true; }
+ bool isInlineInitializedStaticDataMemberLinkOnce() override{ return true; }
llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF,
llvm::Value *ptr,
- QualType type);
+ QualType type) override;
- llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
- llvm::Value *This,
- const CXXRecordDecl *ClassDecl,
- const CXXRecordDecl *BaseClassDecl);
+ llvm::Value *
+ GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+ const CXXRecordDecl *ClassDecl,
+ const CXXRecordDecl *BaseClassDecl) override;
void BuildConstructorSignature(const CXXConstructorDecl *Ctor,
- CXXCtorType Type,
- CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys);
+ CXXCtorType Type, CanQualType &ResTy,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
- llvm::BasicBlock *EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
- const CXXRecordDecl *RD);
+ llvm::BasicBlock *
+ EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
+ const CXXRecordDecl *RD) override;
void initializeHiddenVirtualInheritanceMembers(CodeGenFunction &CGF,
- const CXXRecordDecl *RD);
+ const CXXRecordDecl *RD) override;
- void EmitCXXConstructors(const CXXConstructorDecl *D);
+ void EmitCXXConstructors(const CXXConstructorDecl *D) override;
// Background on MSVC destructors
// ==============================
@@ -109,17 +120,18 @@
void BuildDestructorSignature(const CXXDestructorDecl *Dtor,
CXXDtorType Type,
CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys);
+ SmallVectorImpl<CanQualType> &ArgTys) override;
/// Non-base dtors should be emitted as delegating thunks in this ABI.
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
- CXXDtorType DT) const {
+ CXXDtorType DT) const override {
return DT != Dtor_Base;
}
- void EmitCXXDestructors(const CXXDestructorDecl *D);
+ void EmitCXXDestructors(const CXXDestructorDecl *D) override;
- const CXXRecordDecl *getThisArgumentTypeForMethod(const CXXMethodDecl *MD) {
+ const CXXRecordDecl *
+ getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override {
MD = MD->getCanonicalDecl();
if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) {
MicrosoftVTableContext::MethodVFTableLocation ML =
@@ -136,71 +148,83 @@
return MD->getParent();
}
- llvm::Value *adjustThisArgumentForVirtualCall(CodeGenFunction &CGF,
- GlobalDecl GD,
- llvm::Value *This);
+ llvm::Value *
+ adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
+ llvm::Value *This,
+ bool VirtualCall) override;
- void BuildInstanceFunctionParams(CodeGenFunction &CGF,
- QualType &ResTy,
- FunctionArgList &Params);
+ void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+ FunctionArgList &Params) override;
llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
- CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This);
+ CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override;
- void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
+ void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- void EmitConstructorCall(CodeGenFunction &CGF,
- const CXXConstructorDecl *D, CXXCtorType Type,
- bool ForVirtualBase, bool Delegating,
- llvm::Value *This,
- CallExpr::const_arg_iterator ArgBeg,
- CallExpr::const_arg_iterator ArgEnd);
+ unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
+ const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating,
+ CallArgList &Args) override;
- void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD);
+ void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
+ CXXDtorType Type, bool ForVirtualBase,
+ bool Delegating, llvm::Value *This) override;
+
+ void emitVTableDefinitions(CodeGenVTables &CGVT,
+ const CXXRecordDecl *RD) override;
llvm::Value *getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
BaseSubobject Base, const CXXRecordDecl *NearestVBase,
- bool &NeedsVirtualOffset);
+ bool &NeedsVirtualOffset) override;
llvm::Constant *
getVTableAddressPointForConstExpr(BaseSubobject Base,
- const CXXRecordDecl *VTableClass);
+ const CXXRecordDecl *VTableClass) override;
llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
- CharUnits VPtrOffset);
+ CharUnits VPtrOffset) override;
llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
- llvm::Value *This, llvm::Type *Ty);
+ llvm::Value *This,
+ llvm::Type *Ty) override;
void EmitVirtualDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *Dtor,
CXXDtorType DtorType, SourceLocation CallLoc,
- llvm::Value *This);
+ llvm::Value *This) override;
void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
- CallArgList &CallArgs) {
+ CallArgList &CallArgs) override {
assert(GD.getDtorType() == Dtor_Deleting &&
"Only deleting destructor thunks are available in this ABI");
CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)),
CGM.getContext().IntTy);
}
- void emitVirtualInheritanceTables(const CXXRecordDecl *RD);
+ void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
- void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) {
+ llvm::GlobalVariable *
+ getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+ llvm::GlobalVariable::LinkageTypes Linkage);
+
+ void emitVBTableDefinition(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+ llvm::GlobalVariable *GV) const;
+
+ void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) override {
Thunk->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
}
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
- const ThisAdjustment &TA);
+ const ThisAdjustment &TA) override;
llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
- const ReturnAdjustment &RA);
+ const ReturnAdjustment &RA) override;
void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
llvm::GlobalVariable *DeclPtr,
- bool PerformInit);
+ bool PerformInit) override;
// ==== Notes on array cookies =========
//
@@ -225,17 +249,18 @@
// }
// Whereas it prints "104" and "104" if you give A a destructor.
- bool requiresArrayCookie(const CXXDeleteExpr *expr, QualType elementType);
- bool requiresArrayCookie(const CXXNewExpr *expr);
- CharUnits getArrayCookieSizeImpl(QualType type);
+ bool requiresArrayCookie(const CXXDeleteExpr *expr,
+ QualType elementType) override;
+ bool requiresArrayCookie(const CXXNewExpr *expr) override;
+ CharUnits getArrayCookieSizeImpl(QualType type) override;
llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
llvm::Value *NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
- QualType ElementType);
+ QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
llvm::Value *allocPtr,
- CharUnits cookieSize);
+ CharUnits cookieSize) override;
private:
MicrosoftMangleContext &getMangleContext() {
@@ -258,15 +283,12 @@
return C ? C : getZeroInt();
}
+ CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD);
+
void
GetNullMemberPointerFields(const MemberPointerType *MPT,
llvm::SmallVectorImpl<llvm::Constant *> &fields);
- /// \brief Finds the offset from the base of RD to the vbptr it uses, even if
- /// it is reusing a vbptr from a non-virtual base. RD must have morally
- /// virtual bases.
- CharUnits GetVBPtrOffsetFromBases(const CXXRecordDecl *RD);
-
/// \brief Shared code for virtual base adjustment. Returns the offset from
/// the vbptr to the virtual base. Optionally returns the address of the
/// vbptr itself.
@@ -288,8 +310,8 @@
/// \brief Performs a full virtual base adjustment. Used to dereference
/// pointers to members of virtual bases.
- llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const CXXRecordDecl *RD,
- llvm::Value *Base,
+ llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
+ const CXXRecordDecl *RD, llvm::Value *Base,
llvm::Value *VirtualBaseAdjustmentOffset,
llvm::Value *VBPtrOffset /* optional */);
@@ -311,51 +333,51 @@
void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD);
/// \brief Caching wrapper around VBTableBuilder::enumerateVBTables().
- const VBTableVector &EnumerateVBTables(const CXXRecordDecl *RD);
+ const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD);
/// \brief Generate a thunk for calling a virtual member function MD.
- llvm::Function *EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
- StringRef ThunkName);
+ llvm::Function *EmitVirtualMemPtrThunk(
+ const CXXMethodDecl *MD,
+ const MicrosoftVTableContext::MethodVFTableLocation &ML);
public:
- virtual llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT);
+ llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
- virtual bool isZeroInitializable(const MemberPointerType *MPT);
+ bool isZeroInitializable(const MemberPointerType *MPT) override;
- virtual llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
+ llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
- virtual llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
- CharUnits offset);
- virtual llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD);
- virtual llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT);
+ llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
+ CharUnits offset) override;
+ llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
+ llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
- virtual llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
- llvm::Value *L,
- llvm::Value *R,
- const MemberPointerType *MPT,
- bool Inequality);
+ llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
+ llvm::Value *L,
+ llvm::Value *R,
+ const MemberPointerType *MPT,
+ bool Inequality) override;
- virtual llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
+ llvm::Value *MemPtr,
+ const MemberPointerType *MPT) override;
- virtual llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ llvm::Value *
+ EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+ llvm::Value *Base, llvm::Value *MemPtr,
+ const MemberPointerType *MPT) override;
- virtual llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
- const CastExpr *E,
- llvm::Value *Src);
+ llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
+ const CastExpr *E,
+ llvm::Value *Src) override;
- virtual llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
- llvm::Constant *Src);
+ llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
+ llvm::Constant *Src) override;
- virtual llvm::Value *
- EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT);
+ llvm::Value *
+ EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
+ llvm::Value *&This, llvm::Value *MemPtr,
+ const MemberPointerType *MPT) override;
private:
typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
@@ -368,7 +390,7 @@
/// \brief All the vbtables which have been referenced.
- llvm::DenseMap<const CXXRecordDecl *, VBTableVector> VBTablesMap;
+ llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap;
/// Info on the global variable used to guard initialization of static locals.
/// The BitIndex field is only used for externally invisible declarations.
@@ -392,41 +414,13 @@
return ptr;
}
-/// \brief Finds the first non-virtual base of RD that has virtual bases. If RD
-/// doesn't have a vbptr, it will reuse the vbptr of the returned class.
-static const CXXRecordDecl *FindFirstNVBaseWithVBases(const CXXRecordDecl *RD) {
- for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
- E = RD->bases_end(); I != E; ++I) {
- const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl();
- if (!I->isVirtual() && Base->getNumVBases() > 0)
- return Base;
- }
- llvm_unreachable("RD must have an nv base with vbases");
-}
-
-CharUnits MicrosoftCXXABI::GetVBPtrOffsetFromBases(const CXXRecordDecl *RD) {
- assert(RD->getNumVBases());
- CharUnits Total = CharUnits::Zero();
- while (RD) {
- const ASTRecordLayout &RDLayout = getContext().getASTRecordLayout(RD);
- CharUnits VBPtrOffset = RDLayout.getVBPtrOffset();
- // -1 is the sentinel for no vbptr.
- if (VBPtrOffset != CharUnits::fromQuantity(-1)) {
- Total += VBPtrOffset;
- break;
- }
- RD = FindFirstNVBaseWithVBases(RD);
- Total += RDLayout.getBaseClassOffset(RD);
- }
- return Total;
-}
-
llvm::Value *
MicrosoftCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
llvm::Value *This,
const CXXRecordDecl *ClassDecl,
const CXXRecordDecl *BaseClassDecl) {
- int64_t VBPtrChars = GetVBPtrOffsetFromBases(ClassDecl).getQuantity();
+ int64_t VBPtrChars =
+ getContext().getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
llvm::Value *VBPtrOffset = llvm::ConstantInt::get(CGM.PtrDiffTy, VBPtrChars);
CharUnits IntSize = getContext().getTypeSizeInChars(getContext().IntTy);
CharUnits VBTableChars =
@@ -446,16 +440,20 @@
return isa<CXXConstructorDecl>(GD.getDecl());
}
-void MicrosoftCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
- CXXCtorType Type,
- CanQualType &ResTy,
- SmallVectorImpl<CanQualType> &ArgTys) {
- // 'this' parameter and 'this' return are already in place
+void MicrosoftCXXABI::BuildConstructorSignature(
+ const CXXConstructorDecl *Ctor, CXXCtorType Type, CanQualType &ResTy,
+ SmallVectorImpl<CanQualType> &ArgTys) {
+
+ // All parameters are already in place except is_most_derived, which goes
+ // after 'this' if it's variadic and last if it's not.
const CXXRecordDecl *Class = Ctor->getParent();
+ const FunctionProtoType *FPT = Ctor->getType()->castAs<FunctionProtoType>();
if (Class->getNumVBases()) {
- // Constructors of classes with virtual bases take an implicit parameter.
- ArgTys.push_back(CGM.getContext().IntTy);
+ if (FPT->isVariadic())
+ ArgTys.insert(ArgTys.begin() + 1, CGM.getContext().IntTy);
+ else
+ ArgTys.push_back(CGM.getContext().IntTy);
}
}
@@ -547,19 +545,23 @@
const CXXRecordDecl *RD) {
llvm::Value *ThisInt8Ptr =
CGF.Builder.CreateBitCast(getThisValue(CGF), CGM.Int8PtrTy, "this.int8");
+ const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
- const VBTableVector &VBTables = EnumerateVBTables(RD);
- for (VBTableVector::const_iterator I = VBTables.begin(), E = VBTables.end();
- I != E; ++I) {
+ const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
+ for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
+ const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
+ llvm::GlobalVariable *GV = VBGlobals.Globals[I];
const ASTRecordLayout &SubobjectLayout =
- CGM.getContext().getASTRecordLayout(I->VBPtrSubobject.getBase());
- uint64_t Offs = (I->VBPtrSubobject.getBaseOffset() +
- SubobjectLayout.getVBPtrOffset()).getQuantity();
+ CGM.getContext().getASTRecordLayout(VBT->BaseWithVPtr);
+ CharUnits Offs = VBT->NonVirtualOffset;
+ Offs += SubobjectLayout.getVBPtrOffset();
+ if (VBT->getVBaseWithVPtr())
+ Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
llvm::Value *VBPtr =
- CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs);
- VBPtr = CGF.Builder.CreateBitCast(VBPtr, I->GV->getType()->getPointerTo(0),
- "vbptr." + I->ReusingBase->getName());
- CGF.Builder.CreateStore(I->GV, VBPtr);
+ CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs.getQuantity());
+ VBPtr = CGF.Builder.CreateBitCast(VBPtr, GV->getType()->getPointerTo(0),
+ "vbptr." + VBT->ReusingBase->getName());
+ CGF.Builder.CreateStore(GV, VBPtr);
}
}
@@ -583,12 +585,61 @@
CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
}
-llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualCall(
- CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
+CharUnits
+MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
GD = GD.getCanonicalDecl();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- // FIXME: consider splitting the vdtor vs regular method code into two
- // functions.
+
+ GlobalDecl LookupGD = GD;
+ if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
+ // Complete destructors take a pointer to the complete object as a
+ // parameter, thus don't need this adjustment.
+ if (GD.getDtorType() == Dtor_Complete)
+ return CharUnits();
+
+ // There's no Dtor_Base in vftable but it shares the this adjustment with
+ // the deleting one, so look it up instead.
+ LookupGD = GlobalDecl(DD, Dtor_Deleting);
+ }
+
+ MicrosoftVTableContext::MethodVFTableLocation ML =
+ CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
+ CharUnits Adjustment = ML.VFPtrOffset;
+
+ // Normal virtual instance methods need to adjust from the vfptr that first
+ // defined the virtual method to the virtual base subobject, but destructors
+ // do not. The vector deleting destructor thunk applies this adjustment for
+ // us if necessary.
+ if (isa<CXXDestructorDecl>(MD))
+ Adjustment = CharUnits::Zero();
+
+ if (ML.VBase) {
+ const ASTRecordLayout &DerivedLayout =
+ CGM.getContext().getASTRecordLayout(MD->getParent());
+ Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
+ }
+
+ return Adjustment;
+}
+
+llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
+ CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This, bool VirtualCall) {
+ if (!VirtualCall) {
+ // If the call of a virtual function is not virtual, we just have to
+ // compensate for the adjustment the virtual function does in its prologue.
+ CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
+ if (Adjustment.isZero())
+ return This;
+
+ unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
+ llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
+ This = CGF.Builder.CreateBitCast(This, charPtrTy);
+ assert(Adjustment.isPositive());
+ return CGF.Builder.CreateConstGEP1_32(This, Adjustment.getQuantity());
+ }
+
+ GD = GD.getCanonicalDecl();
+ const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
GlobalDecl LookupGD = GD;
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
@@ -607,51 +658,18 @@
unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
CharUnits StaticOffset = ML.VFPtrOffset;
+
+ // Base destructors expect 'this' to point to the beginning of the base
+ // subobject, not the first vfptr that happens to contain the virtual dtor.
+ // However, we still need to apply the virtual base adjustment.
+ if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
+ StaticOffset = CharUnits::Zero();
+
if (ML.VBase) {
- bool AvoidVirtualOffset = false;
- if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base) {
- // A base destructor can only be called from a complete destructor of the
- // same record type or another destructor of a more derived type;
- // or a constructor of the same record type if an exception is thrown.
- assert(isa<CXXDestructorDecl>(CGF.CurGD.getDecl()) ||
- isa<CXXConstructorDecl>(CGF.CurGD.getDecl()));
- const CXXRecordDecl *CurRD =
- cast<CXXMethodDecl>(CGF.CurGD.getDecl())->getParent();
-
- if (MD->getParent() == CurRD) {
- if (isa<CXXDestructorDecl>(CGF.CurGD.getDecl()))
- assert(CGF.CurGD.getDtorType() == Dtor_Complete);
- if (isa<CXXConstructorDecl>(CGF.CurGD.getDecl()))
- assert(CGF.CurGD.getCtorType() == Ctor_Complete);
- // We're calling the main base dtor from a complete structor,
- // so we know the "this" offset statically.
- AvoidVirtualOffset = true;
- } else {
- // Let's see if we try to call a destructor of a non-virtual base.
- for (CXXRecordDecl::base_class_const_iterator I = CurRD->bases_begin(),
- E = CurRD->bases_end(); I != E; ++I) {
- if (I->getType()->getAsCXXRecordDecl() != MD->getParent())
- continue;
- // If we call a base destructor for a non-virtual base, we statically
- // know where it expects the vfptr and "this" to be.
- // The total offset should reflect the adjustment done by
- // adjustThisParameterInVirtualFunctionPrologue().
- AvoidVirtualOffset = true;
- break;
- }
- }
- }
-
- if (AvoidVirtualOffset) {
- const ASTRecordLayout &Layout =
- CGF.getContext().getASTRecordLayout(MD->getParent());
- StaticOffset += Layout.getVBaseClassOffset(ML.VBase);
- } else {
- This = CGF.Builder.CreateBitCast(This, charPtrTy);
- llvm::Value *VBaseOffset =
- GetVirtualBaseClassOffset(CGF, This, MD->getParent(), ML.VBase);
- This = CGF.Builder.CreateInBoundsGEP(This, VBaseOffset);
- }
+ This = CGF.Builder.CreateBitCast(This, charPtrTy);
+ llvm::Value *VBaseOffset =
+ GetVirtualBaseClassOffset(CGF, This, MD->getParent(), ML.VBase);
+ This = CGF.Builder.CreateInBoundsGEP(This, VBaseOffset);
}
if (!StaticOffset.isZero()) {
assert(StaticOffset.isPositive());
@@ -678,20 +696,25 @@
return false;
}
-void MicrosoftCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
- QualType &ResTy,
- FunctionArgList &Params) {
- BuildThisParam(CGF, Params);
-
+void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
+ QualType &ResTy,
+ FunctionArgList &Params) {
ASTContext &Context = getContext();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+ assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
ImplicitParamDecl *IsMostDerived
= ImplicitParamDecl::Create(Context, 0,
CGF.CurGD.getDecl()->getLocation(),
&Context.Idents.get("is_most_derived"),
Context.IntTy);
- Params.push_back(IsMostDerived);
+ // The 'most_derived' parameter goes second if the ctor is variadic and last
+ // if it's not. Dtors can't be variadic.
+ const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
+ if (FPT->isVariadic())
+ Params.insert(Params.begin() + 1, IsMostDerived);
+ else
+ Params.push_back(IsMostDerived);
getStructorImplicitParamDecl(CGF) = IsMostDerived;
} else if (IsDeletingDtor(CGF.CurGD)) {
ImplicitParamDecl *ShouldDelete
@@ -706,36 +729,12 @@
llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue(
CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
- GD = GD.getCanonicalDecl();
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-
- GlobalDecl LookupGD = GD;
- if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
- // Complete destructors take a pointer to the complete object as a
- // parameter, thus don't need this adjustment.
- if (GD.getDtorType() == Dtor_Complete)
- return This;
-
- // There's no Dtor_Base in vftable but it shares the this adjustment with
- // the deleting one, so look it up instead.
- LookupGD = GlobalDecl(DD, Dtor_Deleting);
- }
-
// In this ABI, every virtual function takes a pointer to one of the
// subobjects that first defines it as the 'this' parameter, rather than a
- // pointer to ther final overrider subobject. Thus, we need to adjust it back
+ // pointer to the final overrider subobject. Thus, we need to adjust it back
// to the final overrider subobject before use.
// See comments in the MicrosoftVFTableContext implementation for the details.
-
- MicrosoftVTableContext::MethodVFTableLocation ML =
- CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
- CharUnits Adjustment = ML.VFPtrOffset;
- if (ML.VBase) {
- const ASTRecordLayout &DerivedLayout =
- CGF.getContext().getASTRecordLayout(MD->getParent());
- Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
- }
-
+ CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
if (Adjustment.isZero())
return This;
@@ -784,43 +783,63 @@
}
}
-void MicrosoftCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type,
- bool ForVirtualBase,
- bool Delegating,
- llvm::Value *This,
- CallExpr::const_arg_iterator ArgBeg,
- CallExpr::const_arg_iterator ArgEnd) {
+unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
+ CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+ bool ForVirtualBase, bool Delegating, CallArgList &Args) {
assert(Type == Ctor_Complete || Type == Ctor_Base);
- llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Ctor_Complete);
- llvm::Value *ImplicitParam = 0;
- QualType ImplicitParamTy;
- if (D->getParent()->getNumVBases()) {
- ImplicitParam = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
- ImplicitParamTy = getContext().IntTy;
+ // Check if we need a 'most_derived' parameter.
+ if (!D->getParent()->getNumVBases())
+ return 0;
+
+ // Add the 'most_derived' argument second if we are variadic or last if not.
+ const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+ llvm::Value *MostDerivedArg =
+ llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
+ RValue RV = RValue::get(MostDerivedArg);
+ if (MostDerivedArg) {
+ if (FPT->isVariadic())
+ Args.insert(Args.begin() + 1,
+ CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+ else
+ Args.add(RV, getContext().IntTy);
+ }
+
+ return 1; // Added one arg.
+}
+
+void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD,
+ CXXDtorType Type, bool ForVirtualBase,
+ bool Delegating, llvm::Value *This) {
+ llvm::Value *Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
+
+ if (DD->isVirtual()) {
+ assert(Type != CXXDtorType::Dtor_Deleting &&
+ "The deleting destructor should only be called via a virtual call");
+ This = adjustThisArgumentForVirtualFunctionCall(CGF, GlobalDecl(DD, Type),
+ This, false);
}
// FIXME: Provide a source location here.
- CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(), This,
- ImplicitParam, ImplicitParamTy, ArgBeg, ArgEnd);
+ CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
+ /*ImplicitParam=*/0, /*ImplicitParamTy=*/QualType(), 0, 0);
}
void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) {
MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
- MicrosoftVTableContext::VFPtrListTy VFPtrs = VFTContext.getVFPtrOffsets(RD);
+ VPtrInfoVector VFPtrs = VFTContext.getVFPtrOffsets(RD);
llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
- for (MicrosoftVTableContext::VFPtrListTy::iterator I = VFPtrs.begin(),
- E = VFPtrs.end(); I != E; ++I) {
- llvm::GlobalVariable *VTable = getAddrOfVTable(RD, I->VFPtrFullOffset);
+ for (VPtrInfoVector::iterator I = VFPtrs.begin(), E = VFPtrs.end(); I != E;
+ ++I) {
+ llvm::GlobalVariable *VTable = getAddrOfVTable(RD, (*I)->FullOffsetInMDC);
if (VTable->hasInitializer())
continue;
const VTableLayout &VTLayout =
- VFTContext.getVFTableLayout(RD, I->VFPtrFullOffset);
+ VFTContext.getVFTableLayout(RD, (*I)->FullOffsetInMDC);
llvm::Constant *Init = CGVT.CreateVTableInitializer(
RD, VTLayout.vtable_component_begin(),
VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(),
@@ -828,7 +847,7 @@
VTable->setInitializer(Init);
VTable->setLinkage(Linkage);
- CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForVTable);
+ CGM.setGlobalVisibility(VTable, RD);
}
}
@@ -847,10 +866,10 @@
}
static void mangleVFTableName(MicrosoftMangleContext &MangleContext,
- const CXXRecordDecl *RD, const VFPtrInfo &VFPtr,
+ const CXXRecordDecl *RD, const VPtrInfo *VFPtr,
SmallString<256> &Name) {
llvm::raw_svector_ostream Out(Name);
- MangleContext.mangleCXXVFTable(RD, VFPtr.PathToMangle, Out);
+ MangleContext.mangleCXXVFTable(RD, VFPtr->MangledPath, Out);
}
llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
@@ -868,7 +887,7 @@
VFTableIdTy ID(RD, VPtrOffset);
VFTablesMapTy::iterator I;
bool Inserted;
- llvm::tie(I, Inserted) = VFTablesMap.insert(
+ std::tie(I, Inserted) = VFTablesMap.insert(
std::make_pair(ID, static_cast<llvm::GlobalVariable *>(0)));
if (!Inserted)
return I->second;
@@ -876,8 +895,7 @@
llvm::GlobalVariable *&VTable = I->second;
MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
- const MicrosoftVTableContext::VFPtrListTy &VFPtrs =
- VTContext.getVFPtrOffsets(RD);
+ const VPtrInfoVector &VFPtrs = VTContext.getVFPtrOffsets(RD);
if (DeferredVFTables.insert(RD)) {
// We haven't processed this record type before.
@@ -898,12 +916,12 @@
}
for (size_t J = 0, F = VFPtrs.size(); J != F; ++J) {
- if (VFPtrs[J].VFPtrFullOffset != VPtrOffset)
+ if (VFPtrs[J]->FullOffsetInMDC != VPtrOffset)
continue;
llvm::ArrayType *ArrayType = llvm::ArrayType::get(
CGM.Int8PtrTy,
- VTContext.getVFTableLayout(RD, VFPtrs[J].VFPtrFullOffset)
+ VTContext.getVFTableLayout(RD, VFPtrs[J]->FullOffsetInMDC)
.getNumVTableComponents());
SmallString<256> Name;
@@ -925,7 +943,8 @@
CGBuilderTy &Builder = CGF.Builder;
Ty = Ty->getPointerTo()->getPointerTo();
- llvm::Value *VPtr = adjustThisArgumentForVirtualCall(CGF, GD, This);
+ llvm::Value *VPtr =
+ adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty);
MicrosoftVTableContext::MethodVFTableLocation ML =
@@ -955,30 +974,47 @@
llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
DtorType == Dtor_Deleting);
- This = adjustThisArgumentForVirtualCall(CGF, GD, This);
+ This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValueSlot(), This,
ImplicitParam, Context.IntTy, 0, 0);
}
-const VBTableVector &
-MicrosoftCXXABI::EnumerateVBTables(const CXXRecordDecl *RD) {
+const VBTableGlobals &
+MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) {
// At this layer, we can key the cache off of a single class, which is much
- // easier than caching at the GlobalVariable layer.
- llvm::DenseMap<const CXXRecordDecl*, VBTableVector>::iterator I;
- bool added;
- llvm::tie(I, added) = VBTablesMap.insert(std::make_pair(RD, VBTableVector()));
- VBTableVector &VBTables = I->second;
- if (!added)
- return VBTables;
+ // easier than caching each vbtable individually.
+ llvm::DenseMap<const CXXRecordDecl*, VBTableGlobals>::iterator Entry;
+ bool Added;
+ std::tie(Entry, Added) =
+ VBTablesMap.insert(std::make_pair(RD, VBTableGlobals()));
+ VBTableGlobals &VBGlobals = Entry->second;
+ if (!Added)
+ return VBGlobals;
- VBTableBuilder(CGM, RD).enumerateVBTables(VBTables);
+ MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
+ VBGlobals.VBTables = &Context.enumerateVBTables(RD);
- return VBTables;
+ // Cache the globals for all vbtables so we don't have to recompute the
+ // mangled names.
+ llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
+ for (VPtrInfoVector::const_iterator I = VBGlobals.VBTables->begin(),
+ E = VBGlobals.VBTables->end();
+ I != E; ++I) {
+ VBGlobals.Globals.push_back(getAddrOfVBTable(**I, RD, Linkage));
+ }
+
+ return VBGlobals;
}
-llvm::Function *
-MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
- StringRef ThunkName) {
+llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
+ const CXXMethodDecl *MD,
+ const MicrosoftVTableContext::MethodVFTableLocation &ML) {
+ // Calculate the mangled name.
+ SmallString<256> ThunkName;
+ llvm::raw_svector_ostream Out(ThunkName);
+ getMangleContext().mangleVirtualMemPtrThunk(MD, Out);
+ Out.flush();
+
// If the thunk has been generated previously, just return it.
if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
return cast<llvm::Function>(GV);
@@ -1002,9 +1038,14 @@
CodeGenFunction CGF(CGM);
CGF.StartThunk(ThunkFn, MD, FnInfo);
- // Get to the Callee.
+ // Load the vfptr and then callee from the vftable. The callee should have
+ // adjusted 'this' so that the vfptr is at offset zero.
llvm::Value *This = CGF.LoadCXXThis();
- llvm::Value *Callee = getVirtualFunctionPointer(CGF, MD, This, ThunkTy);
+ llvm::Value *VTable =
+ CGF.GetVTablePtr(This, ThunkTy->getPointerTo()->getPointerTo());
+ llvm::Value *VFuncPtr =
+ CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
+ llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);
// Make the call and return the result.
CGF.EmitCallAndReturnForThunk(MD, Callee, 0);
@@ -1013,15 +1054,85 @@
}
void MicrosoftCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
- const VBTableVector &VBTables = EnumerateVBTables(RD);
- llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
-
- for (VBTableVector::const_iterator I = VBTables.begin(), E = VBTables.end();
- I != E; ++I) {
- I->EmitVBTableDefinition(CGM, RD, Linkage);
+ const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
+ for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
+ const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
+ llvm::GlobalVariable *GV = VBGlobals.Globals[I];
+ emitVBTableDefinition(*VBT, RD, GV);
}
}
+llvm::GlobalVariable *
+MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+ llvm::GlobalVariable::LinkageTypes Linkage) {
+ SmallString<256> OutName;
+ llvm::raw_svector_ostream Out(OutName);
+ MicrosoftMangleContext &Mangler =
+ cast<MicrosoftMangleContext>(CGM.getCXXABI().getMangleContext());
+ Mangler.mangleCXXVBTable(RD, VBT.MangledPath, Out);
+ Out.flush();
+ StringRef Name = OutName.str();
+
+ llvm::ArrayType *VBTableType =
+ llvm::ArrayType::get(CGM.IntTy, 1 + VBT.ReusingBase->getNumVBases());
+
+ assert(!CGM.getModule().getNamedGlobal(Name) &&
+ "vbtable with this name already exists: mangling bug?");
+ llvm::GlobalVariable *GV =
+ CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
+void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
+ const CXXRecordDecl *RD,
+ llvm::GlobalVariable *GV) const {
+ const CXXRecordDecl *ReusingBase = VBT.ReusingBase;
+
+ assert(RD->getNumVBases() && ReusingBase->getNumVBases() &&
+ "should only emit vbtables for classes with vbtables");
+
+ const ASTRecordLayout &BaseLayout =
+ CGM.getContext().getASTRecordLayout(VBT.BaseWithVPtr);
+ const ASTRecordLayout &DerivedLayout =
+ CGM.getContext().getASTRecordLayout(RD);
+
+ SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(), 0);
+
+ // The offset from ReusingBase's vbptr to itself always leads.
+ CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
+ Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());
+
+ MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
+ for (const auto &I : ReusingBase->vbases()) {
+ const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
+ CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
+ assert(!Offset.isNegative());
+
+ // Make it relative to the subobject vbptr.
+ CharUnits CompleteVBPtrOffset = VBT.NonVirtualOffset + VBPtrOffset;
+ if (VBT.getVBaseWithVPtr())
+ CompleteVBPtrOffset +=
+ DerivedLayout.getVBaseClassOffset(VBT.getVBaseWithVPtr());
+ Offset -= CompleteVBPtrOffset;
+
+ unsigned VBIndex = Context.getVBTableIndex(ReusingBase, VBase);
+ assert(Offsets[VBIndex] == 0 && "The same vbindex seen twice?");
+ Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
+ }
+
+ assert(Offsets.size() ==
+ cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
+ ->getElementType())->getNumElements());
+ llvm::ArrayType *VBTableType =
+ llvm::ArrayType::get(CGM.IntTy, Offsets.size());
+ llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
+ GV->setInitializer(Init);
+
+ // Set the right visibility.
+ CGM.setGlobalVisibility(GV, RD);
+}
+
llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
llvm::Value *This,
const ThisAdjustment &TA) {
@@ -1168,7 +1279,7 @@
if (D.isExternallyVisible()) {
// Externally visible variables have to be numbered in Sema to properly
// handle unreachable VarDecls.
- BitIndex = getContext().getManglingNumber(&D);
+ BitIndex = getContext().getStaticLocalNumber(&D);
assert(BitIndex > 0);
BitIndex--;
} else {
@@ -1229,38 +1340,6 @@
CGF.EmitBlock(EndBlock);
}
-// Member pointer helpers.
-static bool hasVBPtrOffsetField(MSInheritanceModel Inheritance) {
- return Inheritance == MSIM_Unspecified;
-}
-
-static bool hasOnlyOneField(bool IsMemberFunction,
- MSInheritanceModel Inheritance) {
- return Inheritance <= MSIM_SinglePolymorphic ||
- (!IsMemberFunction && Inheritance <= MSIM_MultiplePolymorphic);
-}
-
-// Only member pointers to functions need a this adjustment, since it can be
-// combined with the field offset for data pointers.
-static bool hasNonVirtualBaseAdjustmentField(bool IsMemberFunction,
- MSInheritanceModel Inheritance) {
- return (IsMemberFunction && Inheritance >= MSIM_Multiple);
-}
-
-static bool hasVirtualBaseAdjustmentField(MSInheritanceModel Inheritance) {
- return Inheritance >= MSIM_Virtual;
-}
-
-// Use zero for the field offset of a null data member pointer if we can
-// guarantee that zero is not a valid field offset, or if the member pointer has
-// multiple fields. Polymorphic classes have a vfptr at offset zero, so we can
-// use zero for null. If there are multiple fields, we can use zero even if it
-// is a valid field offset because null-ness testing will check the other
-// fields.
-static bool nullFieldOffsetIsZero(MSInheritanceModel Inheritance) {
- return Inheritance != MSIM_Multiple && Inheritance != MSIM_Single;
-}
-
bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
// Null-ness for function memptrs only depends on the first field, which is
// the function pointer. The rest don't matter, so we can zero initialize.
@@ -1270,28 +1349,28 @@
// The virtual base adjustment field is always -1 for null, so if we have one
// we can't zero initialize. The field offset is sometimes also -1 if 0 is a
// valid field offset.
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
- return (!hasVirtualBaseAdjustmentField(Inheritance) &&
- nullFieldOffsetIsZero(Inheritance));
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
+ return (!MSInheritanceAttr::hasVBTableOffsetField(Inheritance) &&
+ RD->nullFieldOffsetIsZero());
}
llvm::Type *
MicrosoftCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
llvm::SmallVector<llvm::Type *, 4> fields;
if (MPT->isMemberFunctionPointer())
fields.push_back(CGM.VoidPtrTy); // FunctionPointerOrVirtualThunk
else
fields.push_back(CGM.IntTy); // FieldOffset
- if (hasNonVirtualBaseAdjustmentField(MPT->isMemberFunctionPointer(),
- Inheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
+ Inheritance))
fields.push_back(CGM.IntTy);
- if (hasVBPtrOffsetField(Inheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
fields.push_back(CGM.IntTy);
- if (hasVirtualBaseAdjustmentField(Inheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(CGM.IntTy); // VirtualBaseAdjustmentOffset
if (fields.size() == 1)
@@ -1303,24 +1382,24 @@
GetNullMemberPointerFields(const MemberPointerType *MPT,
llvm::SmallVectorImpl<llvm::Constant *> &fields) {
assert(fields.empty());
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
if (MPT->isMemberFunctionPointer()) {
// FunctionPointerOrVirtualThunk
fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
} else {
- if (nullFieldOffsetIsZero(Inheritance))
+ if (RD->nullFieldOffsetIsZero())
fields.push_back(getZeroInt()); // FieldOffset
else
fields.push_back(getAllOnesInt()); // FieldOffset
}
- if (hasNonVirtualBaseAdjustmentField(MPT->isMemberFunctionPointer(),
- Inheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
+ Inheritance))
fields.push_back(getZeroInt());
- if (hasVBPtrOffsetField(Inheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
fields.push_back(getZeroInt());
- if (hasVirtualBaseAdjustmentField(Inheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(getAllOnesInt());
}
@@ -1341,29 +1420,29 @@
const CXXRecordDecl *RD,
CharUnits NonVirtualBaseAdjustment)
{
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Single inheritance class member pointer are represented as scalars instead
// of aggregates.
- if (hasOnlyOneField(IsMemberFunction, Inheritance))
+ if (MSInheritanceAttr::hasOnlyOneField(IsMemberFunction, Inheritance))
return FirstField;
llvm::SmallVector<llvm::Constant *, 4> fields;
fields.push_back(FirstField);
- if (hasNonVirtualBaseAdjustmentField(IsMemberFunction, Inheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(IsMemberFunction, Inheritance))
fields.push_back(llvm::ConstantInt::get(
CGM.IntTy, NonVirtualBaseAdjustment.getQuantity()));
- if (hasVBPtrOffsetField(Inheritance)) {
+ if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
CharUnits Offs = CharUnits::Zero();
if (RD->getNumVBases())
- Offs = GetVBPtrOffsetFromBases(RD);
+ Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
}
// The rest of the fields are adjusted by conversions to a more derived class.
- if (hasVirtualBaseAdjustmentField(Inheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(getZeroInt());
return llvm::ConstantStruct::getAnon(fields);
@@ -1372,7 +1451,7 @@
llvm::Constant *
MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) {
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
llvm::Constant *FirstField =
llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/false, RD,
@@ -1395,8 +1474,8 @@
// FIXME PR15713: Support virtual inheritance paths.
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD))
- return BuildMemberPointer(MPT->getClass()->getAsCXXRecordDecl(),
- MD, ThisAdjustment);
+ return BuildMemberPointer(MPT->getMostRecentCXXRecordDecl(), MD,
+ ThisAdjustment);
CharUnits FieldOffset =
getContext().toCharUnitsFromBits(getContext().getFieldOffset(MPD));
@@ -1409,6 +1488,7 @@
CharUnits NonVirtualBaseAdjustment) {
assert(MD->isInstance() && "Member function must not be static!");
MD = MD->getCanonicalDecl();
+ RD = RD->getMostRecentDecl();
CodeGenTypes &Types = CGM.getTypes();
llvm::Constant *FirstField;
@@ -1442,16 +1522,10 @@
"member function in virtual base class");
FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
} else {
- SmallString<256> ThunkName;
- CharUnits PointerWidth = getContext().toCharUnitsFromBits(
- getContext().getTargetInfo().getPointerWidth(0));
- uint64_t OffsetInVFTable = ML.Index * PointerWidth.getQuantity();
- llvm::raw_svector_ostream Out(ThunkName);
- getMangleContext().mangleVirtualMemPtrThunk(MD, OffsetInVFTable, Out);
- Out.flush();
-
- llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ThunkName.str());
+ llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
+ // Include the vfptr adjustment if the method is in a non-primary vftable.
+ NonVirtualBaseAdjustment += ML.VFPtrOffset;
}
}
@@ -1486,9 +1560,10 @@
// If this is a single field member pointer (single inheritance), this is a
// single icmp.
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
- if (hasOnlyOneField(MPT->isMemberFunctionPointer(), Inheritance))
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
+ if (MSInheritanceAttr::hasOnlyOneField(MPT->isMemberFunctionPointer(),
+ Inheritance))
return Builder.CreateICmp(Eq, L, R);
// Compare the first field.
@@ -1607,11 +1682,9 @@
// Returns an adjusted base cast to i8*, since we do more address arithmetic on
// it.
-llvm::Value *
-MicrosoftCXXABI::AdjustVirtualBase(CodeGenFunction &CGF,
- const CXXRecordDecl *RD, llvm::Value *Base,
- llvm::Value *VBTableOffset,
- llvm::Value *VBPtrOffset) {
+llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
+ CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD,
+ llvm::Value *Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
CGBuilderTy &Builder = CGF.Builder;
Base = Builder.CreateBitCast(Base, CGM.Int8PtrTy);
llvm::BasicBlock *OriginalBB = 0;
@@ -1637,9 +1710,15 @@
// know the vbptr offset.
if (!VBPtrOffset) {
CharUnits offs = CharUnits::Zero();
- if (RD->getNumVBases()) {
- offs = GetVBPtrOffsetFromBases(RD);
- }
+ if (!RD->hasDefinition()) {
+ DiagnosticsEngine &Diags = CGF.CGM.getDiags();
+ unsigned DiagID = Diags.getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "member pointer representation requires a "
+ "complete class type for %0 to perform this expression");
+ Diags.Report(E->getExprLoc(), DiagID) << RD << E->getSourceRange();
+ } else if (RD->getNumVBases())
+ offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
VBPtrOffset = llvm::ConstantInt::get(CGM.IntTy, offs.getQuantity());
}
llvm::Value *VBPtr = 0;
@@ -1659,18 +1738,16 @@
return AdjustedBase;
}
-llvm::Value *
-MicrosoftCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
- llvm::Value *Base,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT) {
+llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+ const MemberPointerType *MPT) {
assert(MPT->isMemberDataPointer());
unsigned AS = Base->getType()->getPointerAddressSpace();
llvm::Type *PType =
CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
CGBuilderTy &Builder = CGF.Builder;
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Extract the fields we need, regardless of model. We'll apply them if we
// have them.
@@ -1681,16 +1758,21 @@
// We need to extract values.
unsigned I = 0;
FieldOffset = Builder.CreateExtractValue(MemPtr, I++);
- if (hasVBPtrOffsetField(Inheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
- if (hasVirtualBaseAdjustmentField(Inheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
}
if (VirtualBaseAdjustmentOffset) {
- Base = AdjustVirtualBase(CGF, RD, Base, VirtualBaseAdjustmentOffset,
+ Base = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
VBPtrOffset);
}
+
+ // Cast to char*.
+ Base = Builder.CreateBitCast(Base, Builder.getInt8Ty()->getPointerTo(AS));
+
+ // Apply the offset, which we assume is non-null.
llvm::Value *Addr =
Builder.CreateInBoundsGEP(Base, FieldOffset, "memptr.offset");
@@ -1699,9 +1781,9 @@
return Builder.CreateBitCast(Addr, PType);
}
-static MSInheritanceModel
+static MSInheritanceAttr::Spelling
getInheritanceFromMemptr(const MemberPointerType *MPT) {
- return MPT->getClass()->getAsCXXRecordDecl()->getMSInheritanceModel();
+ return MPT->getMostRecentCXXRecordDecl()->getMSInheritanceModel();
}
llvm::Value *
@@ -1721,15 +1803,17 @@
const MemberPointerType *SrcTy =
E->getSubExpr()->getType()->castAs<MemberPointerType>();
const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
- MSInheritanceModel SrcInheritance = getInheritanceFromMemptr(SrcTy);
- MSInheritanceModel DstInheritance = getInheritanceFromMemptr(DstTy);
bool IsFunc = SrcTy->isMemberFunctionPointer();
// If the classes use the same null representation, reinterpret_cast is a nop.
bool IsReinterpret = E->getCastKind() == CK_ReinterpretMemberPointer;
- if (IsReinterpret && (IsFunc ||
- nullFieldOffsetIsZero(SrcInheritance) ==
- nullFieldOffsetIsZero(DstInheritance)))
+ if (IsReinterpret && IsFunc)
+ return Src;
+
+ CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
+ CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
+ if (IsReinterpret &&
+ SrcRD->nullFieldOffsetIsZero() == DstRD->nullFieldOffsetIsZero())
return Src;
CGBuilderTy &Builder = CGF.Builder;
@@ -1758,15 +1842,16 @@
llvm::Value *NonVirtualBaseAdjustment = 0;
llvm::Value *VirtualBaseAdjustmentOffset = 0;
llvm::Value *VBPtrOffset = 0;
- if (!hasOnlyOneField(IsFunc, SrcInheritance)) {
+ MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
+ if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
// We need to extract values.
unsigned I = 0;
FirstField = Builder.CreateExtractValue(Src, I++);
- if (hasNonVirtualBaseAdjustmentField(IsFunc, SrcInheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
NonVirtualBaseAdjustment = Builder.CreateExtractValue(Src, I++);
- if (hasVBPtrOffsetField(SrcInheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
VBPtrOffset = Builder.CreateExtractValue(Src, I++);
- if (hasVirtualBaseAdjustmentField(SrcInheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++);
}
@@ -1788,20 +1873,21 @@
// FIXME PR15713: Support conversions through virtually derived classes.
// Recompose dst from the null struct and the adjusted fields from src.
+ MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
llvm::Value *Dst;
- if (hasOnlyOneField(IsFunc, DstInheritance)) {
+ if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) {
Dst = FirstField;
} else {
Dst = llvm::UndefValue::get(DstNull->getType());
unsigned Idx = 0;
Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
- if (hasNonVirtualBaseAdjustmentField(IsFunc, DstInheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
Dst = Builder.CreateInsertValue(
Dst, getValueOrZeroInt(NonVirtualBaseAdjustment), Idx++);
- if (hasVBPtrOffsetField(DstInheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
Dst = Builder.CreateInsertValue(
Dst, getValueOrZeroInt(VBPtrOffset), Idx++);
- if (hasVirtualBaseAdjustmentField(DstInheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
Dst = Builder.CreateInsertValue(
Dst, getValueOrZeroInt(VirtualBaseAdjustmentOffset), Idx++);
}
@@ -1833,8 +1919,8 @@
if (E->getCastKind() == CK_ReinterpretMemberPointer)
return Src;
- MSInheritanceModel SrcInheritance = getInheritanceFromMemptr(SrcTy);
- MSInheritanceModel DstInheritance = getInheritanceFromMemptr(DstTy);
+ MSInheritanceAttr::Spelling SrcInheritance = getInheritanceFromMemptr(SrcTy);
+ MSInheritanceAttr::Spelling DstInheritance = getInheritanceFromMemptr(DstTy);
// Decompose src.
llvm::Constant *FirstField = Src;
@@ -1842,15 +1928,15 @@
llvm::Constant *VirtualBaseAdjustmentOffset = 0;
llvm::Constant *VBPtrOffset = 0;
bool IsFunc = SrcTy->isMemberFunctionPointer();
- if (!hasOnlyOneField(IsFunc, SrcInheritance)) {
+ if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
// We need to extract values.
unsigned I = 0;
FirstField = Src->getAggregateElement(I++);
- if (hasNonVirtualBaseAdjustmentField(IsFunc, SrcInheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
NonVirtualBaseAdjustment = Src->getAggregateElement(I++);
- if (hasVBPtrOffsetField(SrcInheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
VBPtrOffset = Src->getAggregateElement(I++);
- if (hasVirtualBaseAdjustmentField(SrcInheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
VirtualBaseAdjustmentOffset = Src->getAggregateElement(I++);
}
@@ -1873,35 +1959,33 @@
// FIXME PR15713: Support conversions through virtually derived classes.
// Recompose dst from the null struct and the adjusted fields from src.
- if (hasOnlyOneField(IsFunc, DstInheritance))
+ if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance))
return FirstField;
llvm::SmallVector<llvm::Constant *, 4> Fields;
Fields.push_back(FirstField);
- if (hasNonVirtualBaseAdjustmentField(IsFunc, DstInheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
Fields.push_back(getConstantOrZeroInt(NonVirtualBaseAdjustment));
- if (hasVBPtrOffsetField(DstInheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
Fields.push_back(getConstantOrZeroInt(VBPtrOffset));
- if (hasVirtualBaseAdjustmentField(DstInheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
Fields.push_back(getConstantOrZeroInt(VirtualBaseAdjustmentOffset));
return llvm::ConstantStruct::getAnon(Fields);
}
-llvm::Value *
-MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
- llvm::Value *&This,
- llvm::Value *MemPtr,
- const MemberPointerType *MPT) {
+llvm::Value *MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
+ CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+ llvm::Value *MemPtr, const MemberPointerType *MPT) {
assert(MPT->isMemberFunctionPointer());
const FunctionProtoType *FPT =
MPT->getPointeeType()->castAs<FunctionProtoType>();
- const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+ const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
llvm::FunctionType *FTy =
CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT));
CGBuilderTy &Builder = CGF.Builder;
- MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+ MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Extract the fields we need, regardless of model. We'll apply them if we
// have them.
@@ -1913,16 +1997,16 @@
// We need to extract values.
unsigned I = 0;
FunctionPointer = Builder.CreateExtractValue(MemPtr, I++);
- if (hasNonVirtualBaseAdjustmentField(MPT, Inheritance))
+ if (MSInheritanceAttr::hasNVOffsetField(MPT, Inheritance))
NonVirtualBaseAdjustment = Builder.CreateExtractValue(MemPtr, I++);
- if (hasVBPtrOffsetField(Inheritance))
+ if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
- if (hasVirtualBaseAdjustmentField(Inheritance))
+ if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
}
if (VirtualBaseAdjustmentOffset) {
- This = AdjustVirtualBase(CGF, RD, This, VirtualBaseAdjustmentOffset,
+ This = AdjustVirtualBase(CGF, E, RD, This, VirtualBaseAdjustmentOffset,
VBPtrOffset);
}
diff --git a/lib/CodeGen/MicrosoftVBTables.cpp b/lib/CodeGen/MicrosoftVBTables.cpp
deleted file mode 100644
index dabf52c..0000000
--- a/lib/CodeGen/MicrosoftVBTables.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-//===--- MicrosoftVBTables.cpp - Virtual Base Table Emission --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class generates data about MSVC virtual base tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MicrosoftVBTables.h"
-#include "CodeGenModule.h"
-#include "CGCXXABI.h"
-
-namespace clang {
-namespace CodeGen {
-
-/// Holds intermediate data about a path to a vbptr inside a base subobject.
-struct VBTablePath {
- VBTablePath(const VBTableInfo &VBInfo)
- : VBInfo(VBInfo), NextBase(VBInfo.VBPtrSubobject.getBase()) { }
-
- /// All the data needed to build a vbtable, minus the GlobalVariable whose
- /// name we haven't computed yet.
- VBTableInfo VBInfo;
-
- /// Next base to use for disambiguation. Can be null if we've already
- /// disambiguated this path once.
- const CXXRecordDecl *NextBase;
-
- /// Path is not really a full path like a CXXBasePath. It holds the subset of
- /// records that need to be mangled into the vbtable symbol name in order to get
- /// a unique name.
- llvm::SmallVector<const CXXRecordDecl *, 1> Path;
-};
-
-VBTableBuilder::VBTableBuilder(CodeGenModule &CGM,
- const CXXRecordDecl *MostDerived)
- : CGM(CGM), MostDerived(MostDerived),
- DerivedLayout(CGM.getContext().getASTRecordLayout(MostDerived)) {}
-
-void VBTableBuilder::enumerateVBTables(VBTableVector &VBTables) {
- VBTablePathVector Paths;
- findUnambiguousPaths(MostDerived, BaseSubobject(MostDerived,
- CharUnits::Zero()), Paths);
- for (VBTablePathVector::iterator I = Paths.begin(), E = Paths.end();
- I != E; ++I) {
- VBTablePath *P = *I;
- P->VBInfo.GV = getAddrOfVBTable(P->VBInfo.ReusingBase, P->Path);
- VBTables.push_back(P->VBInfo);
- }
-}
-
-
-void VBTableBuilder::findUnambiguousPaths(const CXXRecordDecl *ReusingBase,
- BaseSubobject CurSubobject,
- VBTablePathVector &Paths) {
- size_t PathsStart = Paths.size();
- bool ReuseVBPtrFromBase = true;
- const CXXRecordDecl *CurBase = CurSubobject.getBase();
- const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(CurBase);
-
- // If this base has a vbptr, then we've found a path. These are not full
- // paths, so we don't use CXXBasePath.
- if (Layout.hasOwnVBPtr()) {
- ReuseVBPtrFromBase = false;
- VBTablePath *Info = new VBTablePath(
- VBTableInfo(ReusingBase, CurSubobject, /*GV=*/0));
- Paths.push_back(Info);
- }
-
- // Recurse onto any bases which themselves have virtual bases.
- for (CXXRecordDecl::base_class_const_iterator I = CurBase->bases_begin(),
- E = CurBase->bases_end(); I != E; ++I) {
- const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl();
- if (!Base->getNumVBases())
- continue; // Bases without virtual bases have no vbptrs.
- CharUnits NextOffset;
- const CXXRecordDecl *NextReusingBase = Base;
- if (I->isVirtual()) {
- if (!VBasesSeen.insert(Base))
- continue; // Don't visit virtual bases twice.
- NextOffset = DerivedLayout.getVBaseClassOffset(Base);
- } else {
- NextOffset = (CurSubobject.getBaseOffset() +
- Layout.getBaseClassOffset(Base));
-
- // If CurBase didn't have a vbptr, then ReusingBase will reuse the vbptr
- // from the first non-virtual base with vbases for its vbptr.
- if (ReuseVBPtrFromBase) {
- NextReusingBase = ReusingBase;
- ReuseVBPtrFromBase = false;
- }
- }
-
- size_t NumPaths = Paths.size();
- findUnambiguousPaths(NextReusingBase, BaseSubobject(Base, NextOffset),
- Paths);
-
- // Tag paths through this base with the base itself. We might use it to
- // disambiguate.
- for (size_t I = NumPaths, E = Paths.size(); I != E; ++I)
- Paths[I]->NextBase = Base;
- }
-
- bool AmbiguousPaths = rebucketPaths(Paths, PathsStart);
- if (AmbiguousPaths)
- rebucketPaths(Paths, PathsStart, /*SecondPass=*/true);
-
-#ifndef NDEBUG
- // Check that the paths are in fact unique.
- for (size_t I = PathsStart + 1, E = Paths.size(); I != E; ++I) {
- assert(Paths[I]->Path != Paths[I - 1]->Path && "vbtable paths are not unique");
- }
-#endif
-}
-
-static bool pathCompare(VBTablePath *LHS, VBTablePath *RHS) {
- return LHS->Path < RHS->Path;
-}
-
-void VBTableBuilder::extendPath(VBTablePath *P, bool SecondPass) {
- assert(P->NextBase || SecondPass);
- if (P->NextBase) {
- P->Path.push_back(P->NextBase);
- P->NextBase = 0; // Prevent the path from being extended twice.
- }
-}
-
-bool VBTableBuilder::rebucketPaths(VBTablePathVector &Paths, size_t PathsStart,
- bool SecondPass) {
- // What we're essentially doing here is bucketing together ambiguous paths.
- // Any bucket with more than one path in it gets extended by NextBase, which
- // is usually the direct base of the inherited the vbptr. This code uses a
- // sorted vector to implement a multiset to form the buckets. Note that the
- // ordering is based on pointers, but it doesn't change our output order. The
- // current algorithm is designed to match MSVC 2012's names.
- // TODO: Implement MSVC 2010 or earlier names to avoid extra vbtable cruft.
- VBTablePathVector PathsSorted(&Paths[PathsStart], &Paths.back() + 1);
- std::sort(PathsSorted.begin(), PathsSorted.end(), pathCompare);
- bool AmbiguousPaths = false;
- for (size_t I = 0, E = PathsSorted.size(); I != E;) {
- // Scan forward to find the end of the bucket.
- size_t BucketStart = I;
- do {
- ++I;
- } while (I != E && PathsSorted[BucketStart]->Path == PathsSorted[I]->Path);
-
- // If this bucket has multiple paths, extend them all.
- if (I - BucketStart > 1) {
- AmbiguousPaths = true;
- for (size_t II = BucketStart; II != I; ++II)
- extendPath(PathsSorted[II], SecondPass);
- }
- }
- return AmbiguousPaths;
-}
-
-llvm::GlobalVariable *
-VBTableBuilder::getAddrOfVBTable(const CXXRecordDecl *ReusingBase,
- ArrayRef<const CXXRecordDecl *> BasePath) {
- // Caching at this layer is redundant with the caching in EnumerateVBTables().
-
- SmallString<256> OutName;
- llvm::raw_svector_ostream Out(OutName);
- MicrosoftMangleContext &Mangler =
- cast<MicrosoftMangleContext>(CGM.getCXXABI().getMangleContext());
- Mangler.mangleCXXVBTable(MostDerived, BasePath, Out);
- Out.flush();
- StringRef Name = OutName.str();
-
- llvm::ArrayType *VBTableType =
- llvm::ArrayType::get(CGM.IntTy, 1 + ReusingBase->getNumVBases());
-
- assert(!CGM.getModule().getNamedGlobal(Name) &&
- "vbtable with this name already exists: mangling bug?");
- llvm::GlobalVariable *VBTable =
- CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType,
- llvm::GlobalValue::ExternalLinkage);
- VBTable->setUnnamedAddr(true);
- return VBTable;
-}
-
-void VBTableInfo::EmitVBTableDefinition(
- CodeGenModule &CGM, const CXXRecordDecl *RD,
- llvm::GlobalVariable::LinkageTypes Linkage) const {
- assert(RD->getNumVBases() && ReusingBase->getNumVBases() &&
- "should only emit vbtables for classes with vbtables");
-
- const ASTRecordLayout &BaseLayout =
- CGM.getContext().getASTRecordLayout(VBPtrSubobject.getBase());
- const ASTRecordLayout &DerivedLayout =
- CGM.getContext().getASTRecordLayout(RD);
-
- SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(), 0);
-
- // The offset from ReusingBase's vbptr to itself always leads.
- CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
- Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());
-
- MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
- for (CXXRecordDecl::base_class_const_iterator I = ReusingBase->vbases_begin(),
- E = ReusingBase->vbases_end(); I != E; ++I) {
- const CXXRecordDecl *VBase = I->getType()->getAsCXXRecordDecl();
- CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
- assert(!Offset.isNegative());
- // Make it relative to the subobject vbptr.
- Offset -= VBPtrSubobject.getBaseOffset() + VBPtrOffset;
- unsigned VBIndex = Context.getVBTableIndex(ReusingBase, VBase);
- assert(Offsets[VBIndex] == 0 && "The same vbindex seen twice?");
- Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
- }
-
- assert(Offsets.size() ==
- cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
- ->getElementType())->getNumElements());
- llvm::ArrayType *VBTableType =
- llvm::ArrayType::get(CGM.IntTy, Offsets.size());
- llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
- GV->setInitializer(Init);
-
- // Set the correct linkage.
- GV->setLinkage(Linkage);
-
- // Set the right visibility.
- CGM.setTypeVisibility(GV, RD, CodeGenModule::TVK_ForVTable);
-}
-
-} // namespace CodeGen
-} // namespace clang
diff --git a/lib/CodeGen/MicrosoftVBTables.h b/lib/CodeGen/MicrosoftVBTables.h
deleted file mode 100644
index 4ad8e07..0000000
--- a/lib/CodeGen/MicrosoftVBTables.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//===--- MicrosoftVBTables.h - Virtual Base Table Emission ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class generates data about MSVC virtual base tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/AST/BaseSubobject.h"
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/IR/GlobalVariable.h"
-#include <vector>
-
-namespace clang {
-
-class ASTRecordLayout;
-
-namespace CodeGen {
-
-class CodeGenModule;
-
-struct VBTableInfo {
- VBTableInfo(const CXXRecordDecl *ReusingBase, BaseSubobject VBPtrSubobject,
- llvm::GlobalVariable *GV)
- : ReusingBase(ReusingBase), VBPtrSubobject(VBPtrSubobject), GV(GV) { }
-
- /// The vbtable will hold all of the virtual bases of ReusingBase. This may
- /// or may not be the same class as VBPtrSubobject.Base. A derived class will
- /// reuse the vbptr of the first non-virtual base subobject that has one.
- const CXXRecordDecl *ReusingBase;
-
- /// The vbptr is stored inside this subobject.
- BaseSubobject VBPtrSubobject;
-
- /// The GlobalVariable for this vbtable.
- llvm::GlobalVariable *GV;
-
- /// \brief Emits a definition for GV by setting it's initializer.
- void EmitVBTableDefinition(CodeGenModule &CGM, const CXXRecordDecl *RD,
- llvm::GlobalVariable::LinkageTypes Linkage) const;
-};
-
-// These are embedded in a DenseMap and the elements are large, so we don't want
-// SmallVector.
-typedef std::vector<VBTableInfo> VBTableVector;
-
-struct VBTablePath;
-
-typedef llvm::SmallVector<VBTablePath *, 6> VBTablePathVector;
-
-/// Produces MSVC-compatible vbtable data. The symbols produced by this builder
-/// match those produced by MSVC 2012, which is different from MSVC 2010.
-///
-/// Unlike Itanium, which uses only one vtable per class, MSVC uses a different
-/// symbol for every "address point" installed in base subobjects. As a result,
-/// we have to compute unique symbols for every table. Since there can be
-/// multiple non-virtual base subobjects of the same class, combining the most
-/// derived class with the base containing the vtable is insufficient. The most
-/// trivial algorithm would be to mangle in the entire path from base to most
-/// derived, but that would be too easy and would create unnecessarily large
-/// symbols. ;)
-///
-/// MSVC 2012 appears to minimize the vbtable names using the following
-/// algorithm. First, walk the class hierarchy in the usual order, depth first,
-/// left to right, to find all of the subobjects which contain a vbptr field.
-/// Visiting each class node yields a list of inheritance paths to vbptrs. Each
-/// record with a vbptr creates an initially empty path.
-///
-/// To combine paths from child nodes, the paths are compared to check for
-/// ambiguity. Paths are "ambiguous" if multiple paths have the same set of
-/// components in the same order. Each group of ambiguous paths is extended by
-/// appending the class of the base from which it came. If the current class
-/// node produced an ambiguous path, its path is extended with the current class.
-/// After extending paths, MSVC again checks for ambiguity, and extends any
-/// ambiguous path which wasn't already extended. Because each node yields an
-/// unambiguous set of paths, MSVC doesn't need to extend any path more than once
-/// to produce an unambiguous set of paths.
-///
-/// The VBTableBuilder class attempts to implement this algorithm by repeatedly
-/// bucketing paths together by sorting them.
-///
-/// TODO: Presumably vftables use the same algorithm.
-///
-/// TODO: Implement the MSVC 2010 name mangling scheme to avoid emitting
-/// duplicate vbtables with different symbols.
-class VBTableBuilder {
-public:
- VBTableBuilder(CodeGenModule &CGM, const CXXRecordDecl *MostDerived);
-
- void enumerateVBTables(VBTableVector &VBTables);
-
-private:
- bool hasVBPtr(const CXXRecordDecl *RD);
-
- llvm::GlobalVariable *getAddrOfVBTable(const CXXRecordDecl *ReusingBase,
- ArrayRef<const CXXRecordDecl *> BasePath);
-
- /// Enumerates paths to bases with vbptrs. The paths elements are compressed
- /// to contain only the classes necessary to form an unambiguous path.
- void findUnambiguousPaths(const CXXRecordDecl *ReusingBase,
- BaseSubobject CurSubobject,
- VBTablePathVector &Paths);
-
- void extendPath(VBTablePath *Info, bool SecondPass);
-
- bool rebucketPaths(VBTablePathVector &Paths, size_t PathsStart,
- bool SecondPass = false);
-
- CodeGenModule &CGM;
-
- const CXXRecordDecl *MostDerived;
-
- /// Caches the layout of the most derived class.
- const ASTRecordLayout &DerivedLayout;
-
- /// Set of vbases to avoid re-visiting the same vbases.
- llvm::SmallPtrSet<const CXXRecordDecl*, 4> VBasesSeen;
-};
-
-} // namespace CodeGen
-
-} // namespace clang
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index bc7acbc..7873f44 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -12,30 +12,31 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/ModuleBuilder.h"
-#include "CodeGenModule.h"
#include "CGDebugInfo.h"
+#include "CodeGenModule.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include <memory>
using namespace clang;
namespace {
class CodeGeneratorImpl : public CodeGenerator {
DiagnosticsEngine &Diags;
- OwningPtr<const llvm::DataLayout> TD;
+ std::unique_ptr<const llvm::DataLayout> TD;
ASTContext *Ctx;
const CodeGenOptions CodeGenOpts; // Intentionally copied in.
protected:
- OwningPtr<llvm::Module> M;
- OwningPtr<CodeGen::CodeGenModule> Builder;
+ std::unique_ptr<llvm::Module> M;
+ std::unique_ptr<CodeGen::CodeGenModule> Builder;
+
public:
CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string& ModuleName,
const CodeGenOptions &CGO, llvm::LLVMContext& C)
@@ -44,15 +45,13 @@
virtual ~CodeGeneratorImpl() {}
- virtual llvm::Module* GetModule() {
+ llvm::Module* GetModule() override {
return M.get();
}
- virtual llvm::Module* ReleaseModule() {
- return M.take();
- }
+ llvm::Module *ReleaseModule() override { return M.release(); }
- virtual void Initialize(ASTContext &Context) {
+ void Initialize(ASTContext &Context) override {
Ctx = &Context;
M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
@@ -65,14 +64,14 @@
HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]);
}
- virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+ void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
if (Diags.hasErrorOccurred())
return;
Builder->HandleCXXStaticMemberVarInstantiation(VD);
}
- virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
+ bool HandleTopLevelDecl(DeclGroupRef DG) override {
if (Diags.hasErrorOccurred())
return true;
@@ -86,7 +85,7 @@
/// to (e.g. struct, union, enum, class) is completed. This allows the
/// client hack on the type, which can occur at any point in the file
/// (because these can be defined in declspecs).
- virtual void HandleTagDeclDefinition(TagDecl *D) {
+ void HandleTagDeclDefinition(TagDecl *D) override {
if (Diags.hasErrorOccurred())
return;
@@ -95,10 +94,8 @@
// In C++, we may have member functions that need to be emitted at this
// point.
if (Ctx->getLangOpts().CPlusPlus && !D->isDependentContext()) {
- for (DeclContext::decl_iterator M = D->decls_begin(),
- MEnd = D->decls_end();
- M != MEnd; ++M)
- if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(*M))
+ for (auto *M : D->decls())
+ if (auto *Method = dyn_cast<CXXMethodDecl>(M))
if (Method->doesThisDeclarationHaveABody() &&
(Method->hasAttr<UsedAttr>() ||
Method->hasAttr<ConstructorAttr>()))
@@ -106,7 +103,7 @@
}
}
- virtual void HandleTagDeclRequiredDefinition(const TagDecl *D) LLVM_OVERRIDE {
+ void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
if (Diags.hasErrorOccurred())
return;
@@ -115,8 +112,10 @@
DI->completeRequiredType(RD);
}
- virtual void HandleTranslationUnit(ASTContext &Ctx) {
+ void HandleTranslationUnit(ASTContext &Ctx) override {
if (Diags.hasErrorOccurred()) {
+ if (Builder)
+ Builder->clear();
M.reset();
return;
}
@@ -125,30 +124,30 @@
Builder->Release();
}
- virtual void CompleteTentativeDefinition(VarDecl *D) {
+ void CompleteTentativeDefinition(VarDecl *D) override {
if (Diags.hasErrorOccurred())
return;
Builder->EmitTentativeDefinition(D);
}
- virtual void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) {
+ void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
if (Diags.hasErrorOccurred())
return;
Builder->EmitVTable(RD, DefinitionRequired);
}
- virtual void HandleLinkerOptionPragma(llvm::StringRef Opts) {
+ void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
Builder->AppendLinkerOptions(Opts);
}
- virtual void HandleDetectMismatch(llvm::StringRef Name,
- llvm::StringRef Value) {
+ void HandleDetectMismatch(llvm::StringRef Name,
+ llvm::StringRef Value) override {
Builder->AddDetectMismatch(Name, Value);
}
- virtual void HandleDependentLibrary(llvm::StringRef Lib) {
+ void HandleDependentLibrary(llvm::StringRef Lib) override {
Builder->AddDependentLib(Lib);
}
};
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 7446e67..c602b1f 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -114,6 +114,9 @@
case Ignore:
OS << "Ignore";
break;
+ case InAlloca:
+ OS << "InAlloca Offset=" << getInAllocaFieldIndex();
+ break;
case Indirect:
OS << "Indirect Align=" << getIndirectAlign()
<< " ByVal=" << getIndirectByVal()
@@ -206,14 +209,12 @@
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
- for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
- e = CXXRD->bases_end(); i != e; ++i)
- if (!isEmptyRecord(Context, i->getType(), true))
+ for (const auto &I : CXXRD->bases())
+ if (!isEmptyRecord(Context, I.getType(), true))
return false;
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i)
- if (!isEmptyField(Context, *i, AllowArrays))
+ for (const auto *I : RD->fields())
+ if (!isEmptyField(Context, I, AllowArrays))
return false;
return true;
}
@@ -239,10 +240,9 @@
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
- e = CXXRD->bases_end(); i != e; ++i) {
+ for (const auto &I : CXXRD->bases()) {
// Ignore empty records.
- if (isEmptyRecord(Context, i->getType(), true))
+ if (isEmptyRecord(Context, I.getType(), true))
continue;
// If we already found an element then this isn't a single-element struct.
@@ -251,16 +251,14 @@
// If this is non-empty and not a single element struct, the composite
// cannot be a single element struct.
- Found = isSingleElementStruct(i->getType(), Context);
+ Found = isSingleElementStruct(I.getType(), Context);
if (!Found)
return 0;
}
}
// Check for single element.
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
QualType FT = FD->getType();
// Ignore empty fields.
@@ -336,10 +334,7 @@
uint64_t Size = 0;
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
-
+ for (const auto *FD : RD->fields()) {
if (!is32Or64BitBasicType(FD->getType(), Context))
return false;
@@ -371,15 +366,14 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
- virtual void computeInfo(CGFunctionInfo &FI) const {
+ void computeInfo(CGFunctionInfo &FI) const override {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
}
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -395,7 +389,7 @@
ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
if (isAggregateTypeForABI(Ty)) {
- // Records with non trivial destructors/constructors should not be passed
+ // Records with non-trivial destructors/constructors should not be passed
// by value.
if (isRecordReturnIndirect(Ty, getCXXABI()))
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
@@ -440,9 +434,9 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -454,9 +448,8 @@
void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
}
llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -526,6 +519,16 @@
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
+/// \brief Similar to llvm::CCState, but for Clang.
+struct CCState {
+ CCState(unsigned CC) : CC(CC), FreeRegs(0) {}
+
+ unsigned CC;
+ unsigned FreeRegs;
+ unsigned StackOffset;
+ bool UseInAlloca;
+};
+
/// X86_32ABIInfo - The X86-32 ABI information.
class X86_32ABIInfo : public ABIInfo {
enum Class {
@@ -544,30 +547,37 @@
return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
}
- static bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context,
- unsigned callingConvention);
+ bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context,
+ bool IsInstanceMethod) const;
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
/// such that the argument will be passed in memory.
- ABIArgInfo getIndirectResult(QualType Ty, bool ByVal,
- unsigned &FreeRegs) const;
+ ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
+
+ ABIArgInfo getIndirectReturnResult(CCState &State) const;
/// \brief Return the alignment to use for the given type on the stack.
unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
Class classify(QualType Ty) const;
- ABIArgInfo classifyReturnType(QualType RetTy,
- unsigned callingConvention) const;
- ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &FreeRegs,
- bool IsFastCall) const;
- bool shouldUseInReg(QualType Ty, unsigned &FreeRegs,
- bool IsFastCall, bool &NeedsPadding) const;
+ ABIArgInfo classifyReturnType(QualType RetTy, CCState &State,
+ bool IsInstanceMethod) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+ bool shouldUseInReg(QualType Ty, CCState &State, bool &NeedsPadding) const;
+
+ /// \brief Rewrite the function info so that all memory arguments use
+ /// inalloca.
+ void rewriteWithInAlloca(CGFunctionInfo &FI) const;
+
+ void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
+ unsigned &StackOffset, ABIArgInfo &Info,
+ QualType Type) const;
public:
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
unsigned r)
@@ -585,24 +595,25 @@
const llvm::Triple &Triple, const CodeGenOptions &Opts);
void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const;
+ CodeGen::CodeGenModule &CGM) const override;
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
// Darwin uses different dwarf register numbers for EH.
if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
return 4;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const;
+ llvm::Value *Address) const override;
llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
- llvm::Type* Ty) const {
+ llvm::Type* Ty) const override {
return X86AdjustInlineAsmType(CGF, Constraint, Ty);
}
- llvm::Constant *getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+ llvm::Constant *
+ getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x06 << 8) | // .+0x08
('F' << 16) |
@@ -616,9 +627,8 @@
/// shouldReturnTypeInRegister - Determine if the given type should be
/// passed in a register (for the Darwin ABI).
-bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
- ASTContext &Context,
- unsigned callingConvention) {
+bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, ASTContext &Context,
+ bool IsInstanceMethod) const {
uint64_t Size = Context.getTypeSize(Ty);
// Type must be register sized.
@@ -644,7 +654,7 @@
// Arrays are treated like records.
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
return shouldReturnTypeInRegister(AT->getElementType(), Context,
- callingConvention);
+ IsInstanceMethod);
// Otherwise, it must be a record type.
const RecordType *RT = Ty->getAs<RecordType>();
@@ -654,31 +664,35 @@
// For thiscall conventions, structures will never be returned in
// a register. This is for compatibility with the MSVC ABI
- if (callingConvention == llvm::CallingConv::X86_ThisCall &&
- RT->isStructureType()) {
+ if (IsWin32StructABI && IsInstanceMethod && RT->isStructureType())
return false;
- }
// Structure types are passed in register if all fields would be
// passed in a register.
- for (RecordDecl::field_iterator i = RT->getDecl()->field_begin(),
- e = RT->getDecl()->field_end(); i != e; ++i) {
- const FieldDecl *FD = *i;
-
+ for (const auto *FD : RT->getDecl()->fields()) {
// Empty fields are ignored.
if (isEmptyField(Context, FD, true))
continue;
// Check fields recursively.
- if (!shouldReturnTypeInRegister(FD->getType(), Context,
- callingConvention))
+ if (!shouldReturnTypeInRegister(FD->getType(), Context, IsInstanceMethod))
return false;
}
return true;
}
-ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
- unsigned callingConvention) const {
+ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(CCState &State) const {
+ // If the return value is indirect, then the hidden argument is consuming one
+ // integer register.
+ if (State.FreeRegs) {
+ --State.FreeRegs;
+ return ABIArgInfo::getIndirectInReg(/*Align=*/0, /*ByVal=*/false);
+ }
+ return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
+}
+
+ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State,
+ bool IsInstanceMethod) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
@@ -701,7 +715,7 @@
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
Size));
- return ABIArgInfo::getIndirect(0);
+ return getIndirectReturnResult(State);
}
return ABIArgInfo::getDirect();
@@ -710,21 +724,20 @@
if (isAggregateTypeForABI(RetTy)) {
if (const RecordType *RT = RetTy->getAs<RecordType>()) {
if (isRecordReturnIndirect(RT, getCXXABI()))
- return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+ return getIndirectReturnResult(State);
// Structures with flexible arrays are always indirect.
if (RT->getDecl()->hasFlexibleArrayMember())
- return ABIArgInfo::getIndirect(0);
+ return getIndirectReturnResult(State);
}
// If specified, structs and unions are always indirect.
if (!IsSmallStructInRegABI && !RetTy->isAnyComplexType())
- return ABIArgInfo::getIndirect(0);
+ return getIndirectReturnResult(State);
// Small structures which are register sized are generally returned
// in a register.
- if (X86_32ABIInfo::shouldReturnTypeInRegister(RetTy, getContext(),
- callingConvention)) {
+ if (shouldReturnTypeInRegister(RetTy, getContext(), IsInstanceMethod)) {
uint64_t Size = getContext().getTypeSize(RetTy);
// As a special-case, if the struct is a "single-element" struct, and
@@ -742,7 +755,7 @@
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
}
- return ABIArgInfo::getIndirect(0);
+ return getIndirectReturnResult(State);
}
// Treat an enum type as its underlying type.
@@ -765,13 +778,11 @@
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
- for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
- e = CXXRD->bases_end(); i != e; ++i)
- if (!isRecordWithSSEVectorType(Context, i->getType()))
+ for (const auto &I : CXXRD->bases())
+ if (!isRecordWithSSEVectorType(Context, I.getType()))
return false;
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
+ for (const auto *i : RD->fields()) {
QualType FT = i->getType();
if (isSSEVectorType(Context, FT))
@@ -806,10 +817,10 @@
}
ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
- unsigned &FreeRegs) const {
+ CCState &State) const {
if (!ByVal) {
- if (FreeRegs) {
- --FreeRegs; // Non byval indirects just use one pointer.
+ if (State.FreeRegs) {
+ --State.FreeRegs; // Non-byval indirects just use one pointer.
return ABIArgInfo::getIndirectInReg(0, false);
}
return ABIArgInfo::getIndirect(0, false);
@@ -819,15 +830,12 @@
unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
if (StackAlign == 0)
- return ABIArgInfo::getIndirect(4);
+ return ABIArgInfo::getIndirect(4, /*ByVal=*/true);
// If the stack alignment is less than the type alignment, realign the
// argument.
- if (StackAlign < TypeAlign)
- return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true,
- /*Realign=*/true);
-
- return ABIArgInfo::getIndirect(StackAlign);
+ bool Realign = TypeAlign > StackAlign;
+ return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true, Realign);
}
X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
@@ -843,8 +851,8 @@
return Integer;
}
-bool X86_32ABIInfo::shouldUseInReg(QualType Ty, unsigned &FreeRegs,
- bool IsFastCall, bool &NeedsPadding) const {
+bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
+ bool &NeedsPadding) const {
NeedsPadding = false;
Class C = classify(Ty);
if (C == Float)
@@ -856,14 +864,14 @@
if (SizeInRegs == 0)
return false;
- if (SizeInRegs > FreeRegs) {
- FreeRegs = 0;
+ if (SizeInRegs > State.FreeRegs) {
+ State.FreeRegs = 0;
return false;
}
- FreeRegs -= SizeInRegs;
+ State.FreeRegs -= SizeInRegs;
- if (IsFastCall) {
+ if (State.CC == llvm::CallingConv::X86_FastCall) {
if (Size > 32)
return false;
@@ -876,7 +884,7 @@
if (Ty->isReferenceType())
return true;
- if (FreeRegs)
+ if (State.FreeRegs)
NeedsPadding = true;
return false;
@@ -886,20 +894,26 @@
}
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
- unsigned &FreeRegs,
- bool IsFastCall) const {
+ CCState &State) const {
// FIXME: Set alignment on indirect arguments.
if (isAggregateTypeForABI(Ty)) {
if (const RecordType *RT = Ty->getAs<RecordType>()) {
- if (IsWin32StructABI)
- return getIndirectResult(Ty, true, FreeRegs);
+ // Check with the C++ ABI first.
+ CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+ if (RAA == CGCXXABI::RAA_Indirect) {
+ return getIndirectResult(Ty, false, State);
+ } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+ // The field index doesn't matter, we'll fix it up later.
+ return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
+ }
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
- return getIndirectResult(Ty, RAA == CGCXXABI::RAA_DirectInMemory, FreeRegs);
+ // Structs are always byval on win32, regardless of what they contain.
+ if (IsWin32StructABI)
+ return getIndirectResult(Ty, true, State);
// Structures with flexible arrays are always indirect.
if (RT->getDecl()->hasFlexibleArrayMember())
- return getIndirectResult(Ty, true, FreeRegs);
+ return getIndirectResult(Ty, true, State);
}
// Ignore empty structs/unions.
@@ -909,7 +923,7 @@
llvm::LLVMContext &LLVMContext = getVMContext();
llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
bool NeedsPadding;
- if (shouldUseInReg(Ty, FreeRegs, IsFastCall, NeedsPadding)) {
+ if (shouldUseInReg(Ty, State, NeedsPadding)) {
unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
@@ -923,9 +937,10 @@
// optimizations.
if (getContext().getTypeSize(Ty) <= 4*32 &&
canExpandIndirectArgument(Ty, getContext()))
- return ABIArgInfo::getExpandWithPadding(IsFastCall, PaddingType);
+ return ABIArgInfo::getExpandWithPadding(
+ State.CC == llvm::CallingConv::X86_FastCall, PaddingType);
- return getIndirectResult(Ty, true, FreeRegs);
+ return getIndirectResult(Ty, true, State);
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
@@ -950,7 +965,7 @@
Ty = EnumTy->getDecl()->getIntegerType();
bool NeedsPadding;
- bool InReg = shouldUseInReg(Ty, FreeRegs, IsFastCall, NeedsPadding);
+ bool InReg = shouldUseInReg(Ty, State, NeedsPadding);
if (Ty->isPromotableIntegerType()) {
if (InReg)
@@ -963,32 +978,107 @@
}
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType(),
- FI.getCallingConvention());
-
- unsigned CC = FI.getCallingConvention();
- bool IsFastCall = CC == llvm::CallingConv::X86_FastCall;
- unsigned FreeRegs;
- if (IsFastCall)
- FreeRegs = 2;
+ CCState State(FI.getCallingConvention());
+ if (State.CC == llvm::CallingConv::X86_FastCall)
+ State.FreeRegs = 2;
else if (FI.getHasRegParm())
- FreeRegs = FI.getRegParm();
+ State.FreeRegs = FI.getRegParm();
else
- FreeRegs = DefaultNumRegisterParameters;
+ State.FreeRegs = DefaultNumRegisterParameters;
- // If the return value is indirect, then the hidden argument is consuming one
- // integer register.
- if (FI.getReturnInfo().isIndirect() && FreeRegs) {
- --FreeRegs;
- ABIArgInfo &Old = FI.getReturnInfo();
- Old = ABIArgInfo::getIndirectInReg(Old.getIndirectAlign(),
- Old.getIndirectByVal(),
- Old.getIndirectRealign());
+ FI.getReturnInfo() =
+ classifyReturnType(FI.getReturnType(), State, FI.isInstanceMethod());
+
+ // On win32, use the x86_cdeclmethodcc convention for cdecl methods that use
+ // sret. This convention swaps the order of the first two parameters behind
+ // the scenes to match MSVC.
+ if (IsWin32StructABI && FI.isInstanceMethod() &&
+ FI.getCallingConvention() == llvm::CallingConv::C &&
+ FI.getReturnInfo().isIndirect())
+ FI.setEffectiveCallingConvention(llvm::CallingConv::X86_CDeclMethod);
+
+ bool UsedInAlloca = false;
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
}
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type, FreeRegs, IsFastCall);
+ // If we needed to use inalloca for any argument, do a second pass and rewrite
+ // all the memory arguments to use inalloca.
+ if (UsedInAlloca)
+ rewriteWithInAlloca(FI);
+}
+
+void
+X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
+ unsigned &StackOffset,
+ ABIArgInfo &Info, QualType Type) const {
+ // Insert padding bytes to respect alignment. For x86_32, each argument is 4
+ // byte aligned.
+ unsigned Align = 4U;
+ if (Info.getKind() == ABIArgInfo::Indirect && Info.getIndirectByVal())
+ Align = std::max(Align, Info.getIndirectAlign());
+ if (StackOffset & (Align - 1)) {
+ unsigned OldOffset = StackOffset;
+ StackOffset = llvm::RoundUpToAlignment(StackOffset, Align);
+ unsigned NumBytes = StackOffset - OldOffset;
+ assert(NumBytes);
+ llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
+ Ty = llvm::ArrayType::get(Ty, NumBytes);
+ FrameFields.push_back(Ty);
+ }
+
+ Info = ABIArgInfo::getInAlloca(FrameFields.size());
+ FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+ StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
+}
+
+void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
+ assert(IsWin32StructABI && "inalloca only supported on win32");
+
+ // Build a packed struct type for all of the arguments in memory.
+ SmallVector<llvm::Type *, 6> FrameFields;
+
+ unsigned StackOffset = 0;
+
+ // Put the sret parameter into the inalloca struct if it's in memory.
+ ABIArgInfo &Ret = FI.getReturnInfo();
+ if (Ret.isIndirect() && !Ret.getInReg()) {
+ CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
+ addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
+ // On Windows, the hidden sret parameter is always returned in eax.
+ Ret.setInAllocaSRet(IsWin32StructABI);
+ }
+
+ // Skip the 'this' parameter in ecx.
+ CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
+ if (FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall)
+ ++I;
+
+ // Put arguments passed in memory into the struct.
+ for (; I != E; ++I) {
+
+ // Leave ignored and inreg arguments alone.
+ switch (I->info.getKind()) {
+ case ABIArgInfo::Indirect:
+ assert(I->info.getIndirectByVal());
+ break;
+ case ABIArgInfo::Ignore:
+ continue;
+ case ABIArgInfo::Direct:
+ case ABIArgInfo::Extend:
+ if (I->info.getInReg())
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
+ }
+
+ FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
+ /*isPacked=*/true));
}
llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -1219,10 +1309,10 @@
return false;
}
- virtual void computeInfo(CGFunctionInfo &FI) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1233,10 +1323,10 @@
public:
WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
- virtual void computeInfo(CGFunctionInfo &FI) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -1248,12 +1338,12 @@
return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const {
+ llvm::Value *Address) const override {
llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
// 0-15 are the 16 integer registers.
@@ -1264,12 +1354,12 @@
llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
- llvm::Type* Ty) const {
+ llvm::Type* Ty) const override {
return X86AdjustInlineAsmType(CGF, Constraint, Ty);
}
bool isNoProtoCallVariadic(const CallArgList &args,
- const FunctionNoProtoType *fnType) const {
+ const FunctionNoProtoType *fnType) const override {
// The default CC on x86-64 sets %al to the number of SSA
// registers used, and GCC sets this when calling an unprototyped
// function, so we override the default behavior. However, don't do
@@ -1293,7 +1383,8 @@
return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
}
- llvm::Constant *getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+ llvm::Constant *
+ getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x0a << 8) | // .+0x0c
('F' << 16) |
@@ -1319,14 +1410,14 @@
: X86_32TargetCodeGenInfo(CGT, d, p, w, RegParms) {}
void getDependentLibraryOption(llvm::StringRef Lib,
- llvm::SmallString<24> &Opt) const {
+ llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:";
Opt += qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name,
llvm::StringRef Value,
- llvm::SmallString<32> &Opt) const {
+ llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
@@ -1336,12 +1427,12 @@
WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const {
+ llvm::Value *Address) const override {
llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
// 0-15 are the 16 integer registers.
@@ -1351,14 +1442,14 @@
}
void getDependentLibraryOption(llvm::StringRef Lib,
- llvm::SmallString<24> &Opt) const {
+ llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:";
Opt += qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name,
llvm::StringRef Value,
- llvm::SmallString<32> &Opt) const {
+ llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
@@ -1642,12 +1733,11 @@
// If this is a C++ record, classify the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
- e = CXXRD->bases_end(); i != e; ++i) {
- assert(!i->isVirtual() && !i->getType()->isDependentType() &&
+ for (const auto &I : CXXRD->bases()) {
+ assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
const CXXRecordDecl *Base =
- cast<CXXRecordDecl>(i->getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
// Classify this field.
//
@@ -1657,7 +1747,7 @@
Class FieldLo, FieldHi;
uint64_t Offset =
OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
- classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
+ classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
Lo = merge(Lo, FieldLo);
Hi = merge(Hi, FieldHi);
if (Lo == Memory || Hi == Memory)
@@ -1887,19 +1977,18 @@
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
- e = CXXRD->bases_end(); i != e; ++i) {
- assert(!i->isVirtual() && !i->getType()->isDependentType() &&
+ for (const auto &I : CXXRD->bases()) {
+ assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
const CXXRecordDecl *Base =
- cast<CXXRecordDecl>(i->getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
// If the base is after the span we care about, ignore it.
unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
if (BaseOffset >= EndBit) continue;
unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
- if (!BitsContainNoUserData(i->getType(), BaseStart,
+ if (!BitsContainNoUserData(I.getType(), BaseStart,
EndBit-BaseOffset, Context))
return false;
}
@@ -2643,7 +2732,7 @@
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
// FIXME: mingw-w64-gcc emits 128-bit struct as i128
- if (Size == 128 && getTarget().getTriple().getOS() == llvm::Triple::MinGW32)
+ if (Size == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
Size));
@@ -2668,9 +2757,8 @@
QualType RetTy = FI.getReturnType();
FI.getReturnInfo() = classify(RetTy, true);
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classify(it->type, false);
+ for (auto &I : FI.arguments())
+ I.info = classify(I.type, false);
}
llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -2701,9 +2789,9 @@
public:
NaClX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
: ABIInfo(CGT), PInfo(CGT), NInfo(CGT, HasAVX) {}
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
private:
PNaClABIInfo PInfo; // Used for generating calls with pnaclcall callingconv.
X86_64ABIInfo NInfo; // Used for everything else.
@@ -2739,13 +2827,13 @@
public:
PPC32TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const;
+ llvm::Value *Address) const override;
};
}
@@ -2811,29 +2899,27 @@
// floating-point value) to avoid pushing them to memory on function
// entry. This would require changing the logic in PPCISelLowering
// when lowering the parameters in the caller and args in the callee.
- virtual void computeInfo(CGFunctionInfo &FI) const {
+ void computeInfo(CGFunctionInfo &FI) const override {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it) {
+ for (auto &I : FI.arguments()) {
// We rely on the default argument classification for the most part.
// One exception: An aggregate containing a single floating-point
// or vector item must be passed in a register if one is available.
- const Type *T = isSingleElementStruct(it->type, getContext());
+ const Type *T = isSingleElementStruct(I.type, getContext());
if (T) {
const BuiltinType *BT = T->getAs<BuiltinType>();
if (T->isVectorType() || (BT && BT->isFloatingPoint())) {
QualType QT(T, 0);
- it->info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
+ I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
continue;
}
}
- it->info = classifyArgumentType(it->type);
+ I.info = classifyArgumentType(I.type);
}
}
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr,
- QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2841,26 +2927,26 @@
PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT)) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const;
+ llvm::Value *Address) const override;
};
class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
public:
PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const;
+ llvm::Value *Address) const override;
};
}
@@ -3049,6 +3135,569 @@
}
//===----------------------------------------------------------------------===//
+// ARM64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class ARM64ABIInfo : public ABIInfo {
+public:
+ enum ABIKind {
+ AAPCS = 0,
+ DarwinPCS
+ };
+
+private:
+ ABIKind Kind;
+
+public:
+ ARM64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+
+private:
+ ABIKind getABIKind() const { return Kind; }
+ bool isDarwinPCS() const { return Kind == DarwinPCS; }
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
+ bool &IsHA, unsigned &AllocatedGPR,
+ bool &IsSmallAggr) const;
+ bool isIllegalVectorType(QualType Ty) const;
+
+ virtual void computeInfo(CGFunctionInfo &FI) const {
+ // To correctly handle Homogeneous Aggregate, we need to keep track of the
+ // number of SIMD and Floating-point registers allocated so far.
+ // If the argument is an HFA or an HVA and there are sufficient unallocated
+ // SIMD and Floating-point registers, then the argument is allocated to SIMD
+ // and Floating-point Registers (with one register per member of the HFA or
+ // HVA). Otherwise, the NSRN is set to 8.
+ unsigned AllocatedVFP = 0;
+ // To correctly handle small aggregates, we need to keep track of the number
+ // of GPRs allocated so far. If the small aggregate can't all fit into
+ // registers, it will be on stack. We don't allow the aggregate to be
+ // partially in registers.
+ unsigned AllocatedGPR = 0;
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+ it != ie; ++it) {
+ unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
+ bool IsHA = false, IsSmallAggr = false;
+ const unsigned NumVFPs = 8;
+ const unsigned NumGPRs = 8;
+ it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
+ AllocatedGPR, IsSmallAggr);
+ // If we do not have enough VFP registers for the HA, any VFP registers
+ // that are unallocated are marked as unavailable. To achieve this, we add
+ // padding of (NumVFPs - PreAllocation) floats.
+ if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+ llvm::Type *PaddingTy = llvm::ArrayType::get(
+ llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
+ if (isDarwinPCS())
+ it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+ else {
+ // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
+ // as sequences of floats since they'll get "holes" inserted as
+ // padding by the back end.
+ uint32_t NumStackSlots = getContext().getTypeSize(it->type);
+ NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
+
+ llvm::Type *CoerceTy = llvm::ArrayType::get(
+ llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
+ it->info = ABIArgInfo::getDirect(CoerceTy, 0, PaddingTy);
+ }
+ }
+ // If we do not have enough GPRs for the small aggregate, any GPR regs
+ // that are unallocated are marked as unavailable.
+ if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
+ llvm::Type *PaddingTy = llvm::ArrayType::get(
+ llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
+ it->info =
+ ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
+ }
+ }
+ }
+
+ llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const;
+
+ llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const;
+
+ virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const {
+ return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
+ : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+ }
+};
+
+class ARM64TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ ARM64TargetCodeGenInfo(CodeGenTypes &CGT, ARM64ABIInfo::ABIKind Kind)
+ : TargetCodeGenInfo(new ARM64ABIInfo(CGT, Kind)) {}
+
+ StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+ return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
+ }
+
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { return 31; }
+
+ virtual bool doesReturnSlotInterfereWithArgs() const { return false; }
+};
+}
+
+static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+ ASTContext &Context,
+ uint64_t *HAMembers = 0);
+
+ABIArgInfo ARM64ABIInfo::classifyArgumentType(QualType Ty,
+ unsigned &AllocatedVFP,
+ bool &IsHA,
+ unsigned &AllocatedGPR,
+ bool &IsSmallAggr) const {
+ // Handle illegal vector types here.
+ if (isIllegalVectorType(Ty)) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 32) {
+ llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
+ AllocatedGPR++;
+ return ABIArgInfo::getDirect(ResType);
+ }
+ if (Size == 64) {
+ llvm::Type *ResType =
+ llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
+ AllocatedVFP++;
+ return ABIArgInfo::getDirect(ResType);
+ }
+ if (Size == 128) {
+ llvm::Type *ResType =
+ llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
+ AllocatedVFP++;
+ return ABIArgInfo::getDirect(ResType);
+ }
+ AllocatedGPR++;
+ return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+ }
+ if (Ty->isVectorType())
+ // Size of a legal vector should be either 64 or 128.
+ AllocatedVFP++;
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ if (BT->getKind() == BuiltinType::Half ||
+ BT->getKind() == BuiltinType::Float ||
+ BT->getKind() == BuiltinType::Double ||
+ BT->getKind() == BuiltinType::LongDouble)
+ AllocatedVFP++;
+ }
+
+ if (!isAggregateTypeForABI(Ty)) {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ if (!Ty->isFloatingType() && !Ty->isVectorType()) {
+ int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
+ AllocatedGPR += RegsNeeded;
+ }
+ return (Ty->isPromotableIntegerType() && isDarwinPCS()
+ ? ABIArgInfo::getExtend()
+ : ABIArgInfo::getDirect());
+ }
+
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are always indirect.
+ if (isRecordReturnIndirect(Ty, getCXXABI())) {
+ AllocatedGPR++;
+ return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+ }
+
+ // Empty records are always ignored on Darwin, but actually passed in C++ mode
+ // elsewhere for GNU compatibility.
+ if (isEmptyRecord(getContext(), Ty, true)) {
+ if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
+ return ABIArgInfo::getIgnore();
+
+ ++AllocatedGPR;
+ return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
+ }
+
+ // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
+ const Type *Base = 0;
+ uint64_t Members = 0;
+ if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+ AllocatedVFP += Members;
+ IsHA = true;
+ return ABIArgInfo::getExpand();
+ }
+
+ // Aggregates <= 16 bytes are passed directly in registers or on the stack.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 128) {
+ Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ AllocatedGPR += Size / 64;
+ IsSmallAggr = true;
+ // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
+ // For aggregates with 16-byte alignment, we use i128.
+ if (getContext().getTypeAlign(Ty) < 128 && Size == 128) {
+ llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
+ }
+ return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+ }
+
+ AllocatedGPR++;
+ return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+}
+
+ABIArgInfo ARM64ABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+
+ // Large vector types should be returned via memory.
+ if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
+ return ABIArgInfo::getIndirect(0);
+
+ if (!isAggregateTypeForABI(RetTy)) {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+ RetTy = EnumTy->getDecl()->getIntegerType();
+
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend()
+ : ABIArgInfo::getDirect());
+ }
+
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are always indirect.
+ if (isRecordReturnIndirect(RetTy, getCXXABI()))
+ return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+
+ if (isEmptyRecord(getContext(), RetTy, true))
+ return ABIArgInfo::getIgnore();
+
+ const Type *Base = 0;
+ if (isHomogeneousAggregate(RetTy, Base, getContext()))
+ // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
+ return ABIArgInfo::getDirect();
+
+ // Aggregates <= 16 bytes are returned directly in registers or on the stack.
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (Size <= 128) {
+ Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+ }
+
+ return ABIArgInfo::getIndirect(0);
+}
+
+/// isIllegalVectorType - check whether the vector type is legal for ARM64.
+bool ARM64ABIInfo::isIllegalVectorType(QualType Ty) const {
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // Check whether VT is legal.
+ unsigned NumElements = VT->getNumElements();
+ uint64_t Size = getContext().getTypeSize(VT);
+ // NumElements should be power of 2 between 1 and 16.
+ if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16)
+ return true;
+ return Size != 64 && (Size != 128 || NumElements == 1);
+ }
+ return false;
+}
+
+static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
+ int AllocatedGPR, int AllocatedVFP,
+ bool IsIndirect, CodeGenFunction &CGF) {
+ // The AArch64 va_list type and handling is specified in the Procedure Call
+ // Standard, section B.4:
+ //
+ // struct {
+ // void *__stack;
+ // void *__gr_top;
+ // void *__vr_top;
+ // int __gr_offs;
+ // int __vr_offs;
+ // };
+
+ llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+ llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+ llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+ llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+ auto &Ctx = CGF.getContext();
+
+ llvm::Value *reg_offs_p = 0, *reg_offs = 0;
+ int reg_top_index;
+ int RegSize;
+ if (AllocatedGPR) {
+ assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+ // 3 is the field number of __gr_offs
+ reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+ reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
+ reg_top_index = 1; // field number for __gr_top
+ RegSize = 8 * AllocatedGPR;
+ } else {
+ assert(!AllocatedGPR && "Argument must go in VFP or int regs");
+ // 4 is the field number of __vr_offs.
+ reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+ reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
+ reg_top_index = 2; // field number for __vr_top
+ RegSize = 16 * AllocatedVFP;
+ }
+
+ //=======================================
+ // Find out where argument was passed
+ //=======================================
+
+ // If reg_offs >= 0 we're already using the stack for this type of
+ // argument. We don't want to keep updating reg_offs (in case it overflows,
+ // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+ // whatever they get).
+ llvm::Value *UsingStack = 0;
+ UsingStack = CGF.Builder.CreateICmpSGE(
+ reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+ CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
+
+ // Otherwise, at least some kind of argument could go in these registers, the
+ // quesiton is whether this particular type is too big.
+ CGF.EmitBlock(MaybeRegBlock);
+
+ // Integer arguments may need to correct register alignment (for example a
+ // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+ // align __gr_offs to calculate the potential address.
+ if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+ int Align = Ctx.getTypeAlign(Ty) / 8;
+
+ reg_offs = CGF.Builder.CreateAdd(
+ reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
+ "align_regoffs");
+ reg_offs = CGF.Builder.CreateAnd(
+ reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
+ "aligned_regoffs");
+ }
+
+ // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+ llvm::Value *NewOffset = 0;
+ NewOffset = CGF.Builder.CreateAdd(
+ reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
+ CGF.Builder.CreateStore(NewOffset, reg_offs_p);
+
+ // Now we're in a position to decide whether this argument really was in
+ // registers or not.
+ llvm::Value *InRegs = 0;
+ InRegs = CGF.Builder.CreateICmpSLE(
+ NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
+
+ CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
+
+ //=======================================
+ // Argument was in registers
+ //=======================================
+
+ // Now we emit the code for if the argument was originally passed in
+ // registers. First start the appropriate block:
+ CGF.EmitBlock(InRegBlock);
+
+ llvm::Value *reg_top_p = 0, *reg_top = 0;
+ reg_top_p =
+ CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+ reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
+ llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
+ llvm::Value *RegAddr = 0;
+ llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+
+ if (IsIndirect) {
+ // If it's been passed indirectly (actually a struct), whatever we find from
+ // stored registers or on the stack will actually be a struct **.
+ MemTy = llvm::PointerType::getUnqual(MemTy);
+ }
+
+ const Type *Base = 0;
+ uint64_t NumMembers;
+ if (isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers) && NumMembers > 1) {
+ // Homogeneous aggregates passed in registers will have their elements split
+ // and stored 16-bytes apart regardless of size (they're notionally in qN,
+ // qN+1, ...). We reload and store into a temporary local variable
+ // contiguously.
+ assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
+ llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
+ llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
+ llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+ int Offset = 0;
+
+ if (CGF.CGM.getDataLayout().isBigEndian() && Ctx.getTypeSize(Base) < 128)
+ Offset = 16 - Ctx.getTypeSize(Base) / 8;
+ for (unsigned i = 0; i < NumMembers; ++i) {
+ llvm::Value *BaseOffset =
+ llvm::ConstantInt::get(CGF.Int32Ty, 16 * i + Offset);
+ llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
+ LoadAddr = CGF.Builder.CreateBitCast(
+ LoadAddr, llvm::PointerType::getUnqual(BaseTy));
+ llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+
+ llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
+ CGF.Builder.CreateStore(Elem, StoreAddr);
+ }
+
+ RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+ } else {
+ // Otherwise the object is contiguous in memory
+ unsigned BeAlign = reg_top_index == 2 ? 16 : 8;
+ if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
+ Ctx.getTypeSize(Ty) < (BeAlign * 8)) {
+ int Offset = BeAlign - Ctx.getTypeSize(Ty) / 8;
+ BaseAddr = CGF.Builder.CreatePtrToInt(BaseAddr, CGF.Int64Ty);
+
+ BaseAddr = CGF.Builder.CreateAdd(
+ BaseAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
+
+ BaseAddr = CGF.Builder.CreateIntToPtr(BaseAddr, CGF.Int8PtrTy);
+ }
+
+ RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+ }
+
+ CGF.EmitBranch(ContBlock);
+
+ //=======================================
+ // Argument was on the stack
+ //=======================================
+ CGF.EmitBlock(OnStackBlock);
+
+ llvm::Value *stack_p = 0, *OnStackAddr = 0;
+ stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+ OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+
+ // Again, stack arguments may need realigmnent. In this case both integer and
+ // floating-point ones might be affected.
+ if (!IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+ int Align = Ctx.getTypeAlign(Ty) / 8;
+
+ OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+ OnStackAddr = CGF.Builder.CreateAdd(
+ OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+ "align_stack");
+ OnStackAddr = CGF.Builder.CreateAnd(
+ OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+ "align_stack");
+
+ OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+ }
+
+ uint64_t StackSize;
+ if (IsIndirect)
+ StackSize = 8;
+ else
+ StackSize = Ctx.getTypeSize(Ty) / 8;
+
+ // All stack slots are 8 bytes
+ StackSize = llvm::RoundUpToAlignment(StackSize, 8);
+
+ llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+ llvm::Value *NewStack =
+ CGF.Builder.CreateGEP(OnStackAddr, StackSizeC, "new_stack");
+
+ // Write the new value of __stack for the next call to va_arg
+ CGF.Builder.CreateStore(NewStack, stack_p);
+
+ if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
+ Ctx.getTypeSize(Ty) < 64) {
+ int Offset = 8 - Ctx.getTypeSize(Ty) / 8;
+ OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+ OnStackAddr = CGF.Builder.CreateAdd(
+ OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
+
+ OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+ }
+
+ OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+
+ CGF.EmitBranch(ContBlock);
+
+ //=======================================
+ // Tidy up
+ //=======================================
+ CGF.EmitBlock(ContBlock);
+
+ llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
+ ResAddr->addIncoming(RegAddr, InRegBlock);
+ ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+
+ if (IsIndirect)
+ return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+
+ return ResAddr;
+}
+
+llvm::Value *ARM64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const {
+
+ unsigned AllocatedGPR = 0, AllocatedVFP = 0;
+ bool IsHA = false, IsSmallAggr = false;
+ ABIArgInfo AI =
+ classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, IsSmallAggr);
+
+ return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP,
+ AI.isIndirect(), CGF);
+}
+
+llvm::Value *ARM64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const {
+ // We do not support va_arg for aggregates or illegal vector types.
+ // Lower VAArg here for these cases and use the LLVM va_arg instruction for
+ // other cases.
+ if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
+ return 0;
+
+ uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
+ uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
+
+ const Type *Base = 0;
+ bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
+
+ bool isIndirect = false;
+ // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
+ // be passed indirectly.
+ if (Size > 16 && !isHA) {
+ isIndirect = true;
+ Size = 8;
+ Align = 8;
+ }
+
+ llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
+ llvm::Type *BPP = llvm::PointerType::getUnqual(BP);
+
+ CGBuilderTy &Builder = CGF.Builder;
+ llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
+ llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+
+ if (isEmptyRecord(getContext(), Ty, true)) {
+ // These are ignored for parameter passing purposes.
+ llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+ return Builder.CreateBitCast(Addr, PTy);
+ }
+
+ const uint64_t MinABIAlign = 8;
+ if (Align > MinABIAlign) {
+ llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
+ Addr = Builder.CreateGEP(Addr, Offset);
+ llvm::Value *AsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+ llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, ~(Align - 1));
+ llvm::Value *Aligned = Builder.CreateAnd(AsInt, Mask);
+ Addr = Builder.CreateIntToPtr(Aligned, BP, "ap.align");
+ }
+
+ uint64_t Offset = llvm::RoundUpToAlignment(Size, MinABIAlign);
+ llvm::Value *NextAddr = Builder.CreateGEP(
+ Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
+ Builder.CreateStore(NextAddr, VAListAddrAsBPP);
+
+ if (isIndirect)
+ Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
+ llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+ llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
+ return AddrTyped;
+}
+
+//===----------------------------------------------------------------------===//
// ARM ABI Implementation
//===----------------------------------------------------------------------===//
@@ -3064,35 +3713,62 @@
private:
ABIKind Kind;
+ mutable int VFPRegs[16];
+ const unsigned NumVFPs;
+ const unsigned NumGPRs;
+ mutable unsigned AllocatedGPRs;
+ mutable unsigned AllocatedVFPs;
public:
- ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+ ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind),
+ NumVFPs(16), NumGPRs(4) {
setRuntimeCC();
+ resetAllocatedRegs();
}
bool isEABI() const {
- StringRef Env = getTarget().getTriple().getEnvironmentName();
- return (Env == "gnueabi" || Env == "eabi" ||
- Env == "android" || Env == "androideabi");
+ switch (getTarget().getTriple().getEnvironment()) {
+ case llvm::Triple::Android:
+ case llvm::Triple::EABI:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::GNUEABIHF:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool isEABIHF() const {
+ switch (getTarget().getTriple().getEnvironment()) {
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::GNUEABIHF:
+ return true;
+ default:
+ return false;
+ }
}
ABIKind getABIKind() const { return Kind; }
private:
- ABIArgInfo classifyReturnType(QualType RetTy) const;
- ABIArgInfo classifyArgumentType(QualType RetTy, int *VFPRegs,
- unsigned &AllocatedVFP,
- bool &IsHA) const;
+ ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic,
+ bool &IsCPRC) const;
bool isIllegalVectorType(QualType Ty) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
llvm::CallingConv::ID getLLVMDefaultCC() const;
llvm::CallingConv::ID getABIDefaultCC() const;
void setRuntimeCC();
+
+ void markAllocatedGPRs(unsigned Alignment, unsigned NumRequired) const;
+ void markAllocatedVFPs(unsigned Alignment, unsigned NumRequired) const;
+ void resetAllocatedRegs(void) const;
};
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -3104,16 +3780,16 @@
return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 13;
}
- StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+ StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue";
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const {
+ llvm::Value *Address) const override {
llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
// 0-15 are the 16 integer registers.
@@ -3121,13 +3797,13 @@
return false;
}
- unsigned getSizeOfUnwindException() const {
+ unsigned getSizeOfUnwindException() const override {
if (getABIInfo().isEABI()) return 88;
return TargetCodeGenInfo::getSizeOfUnwindException();
}
void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
+ CodeGen::CodeGenModule &CGM) const override {
const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
if (!FD)
return;
@@ -3176,24 +3852,40 @@
// allocated to the lowest-numbered sequence of such registers.
// C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
// unallocated are marked as unavailable.
- unsigned AllocatedVFP = 0;
- int VFPRegs[16] = { 0 };
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it) {
- unsigned PreAllocation = AllocatedVFP;
+ resetAllocatedRegs();
+
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+ for (auto &I : FI.arguments()) {
+ unsigned PreAllocationVFPs = AllocatedVFPs;
+ unsigned PreAllocationGPRs = AllocatedGPRs;
bool IsHA = false;
+ bool IsCPRC = false;
// 6.1.2.3 There is one VFP co-processor register class using registers
// s0-s15 (d0-d7) for passing arguments.
- const unsigned NumVFPs = 16;
- it->info = classifyArgumentType(it->type, VFPRegs, AllocatedVFP, IsHA);
+ I.info = classifyArgumentType(I.type, IsHA, FI.isVariadic(), IsCPRC);
+ assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs");
// If we do not have enough VFP registers for the HA, any VFP registers
// that are unallocated are marked as unavailable. To achieve this, we add
- // padding of (NumVFPs - PreAllocation) floats.
- if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+ // padding of (NumVFPs - PreAllocationVFP) floats.
+ // Note that IsHA will only be set when using the AAPCS-VFP calling convention,
+ // and the callee is not variadic.
+ if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) {
llvm::Type *PaddingTy = llvm::ArrayType::get(
- llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
- it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+ llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs);
+ I.info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+ }
+
+ // If we have allocated some arguments onto the stack (due to running
+ // out of VFP registers), we cannot split an argument between GPRs and
+ // the stack. If this situation occurs, we add padding to prevent the
+ // GPRs from being used. In this situiation, the current argument could
+ // only be allocated by rule C.8, so rule C.6 would mark these GPRs as
+ // unusable anyway.
+ const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
+ if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs && StackUsed) {
+ llvm::Type *PaddingTy = llvm::ArrayType::get(
+ llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
+ I.info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
}
}
@@ -3209,7 +3901,7 @@
/// Return the default calling convention that LLVM will use.
llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
// The default calling convention that LLVM will infer.
- if (getTarget().getTriple().getEnvironmentName()=="gnueabihf")
+ if (isEABIHF())
return llvm::CallingConv::ARM_AAPCS_VFP;
else if (isEABI())
return llvm::CallingConv::ARM_AAPCS;
@@ -3243,8 +3935,7 @@
/// contained in the type is returned through it; this is used for the
/// recursive calls that check aggregate component types.
static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
- ASTContext &Context,
- uint64_t *HAMembers = 0) {
+ ASTContext &Context, uint64_t *HAMembers) {
uint64_t Members = 0;
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
@@ -3256,9 +3947,7 @@
return false;
Members = 0;
- for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
- i != e; ++i) {
- const FieldDecl *FD = *i;
+ for (const auto *FD : RD->fields()) {
uint64_t FldMembers;
if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
return false;
@@ -3293,10 +3982,29 @@
const Type *TyPtr = Ty.getTypePtr();
if (!Base)
Base = TyPtr;
- if (Base != TyPtr &&
- (!Base->isVectorType() || !TyPtr->isVectorType() ||
- Context.getTypeSize(Base) != Context.getTypeSize(TyPtr)))
- return false;
+
+ if (Base != TyPtr) {
+ // Homogeneous aggregates are defined as containing members with the
+ // same machine type. There are two cases in which two members have
+ // different TypePtrs but the same machine type:
+
+ // 1) Vectors of the same length, regardless of the type and number
+ // of their members.
+ const bool SameLengthVectors = Base->isVectorType() && TyPtr->isVectorType()
+ && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
+
+ // 2) In the 32-bit AAPCS, `double' and `long double' have the same
+ // machine type. This is not the case for the 64-bit AAPCS.
+ const bool SameSizeDoubles =
+ ( ( Base->isSpecificBuiltinType(BuiltinType::Double)
+ && TyPtr->isSpecificBuiltinType(BuiltinType::LongDouble))
+ || ( Base->isSpecificBuiltinType(BuiltinType::LongDouble)
+ && TyPtr->isSpecificBuiltinType(BuiltinType::Double)))
+ && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
+
+ if (!SameLengthVectors && !SameSizeDoubles)
+ return false;
+ }
}
// Homogeneous Aggregates can have at most 4 members of the base type.
@@ -3308,12 +4016,15 @@
/// markAllocatedVFPs - update VFPRegs according to the alignment and
/// number of VFP registers (unit is S register) requested.
-static void markAllocatedVFPs(int *VFPRegs, unsigned &AllocatedVFP,
- unsigned Alignment,
- unsigned NumRequired) {
+void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
+ unsigned NumRequired) const {
// Early Exit.
- if (AllocatedVFP >= 16)
+ if (AllocatedVFPs >= 16) {
+ // We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
+ // the stack.
+ AllocatedVFPs = 17;
return;
+ }
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
// VFP registers of the appropriate type unallocated then the argument is
// allocated to the lowest-numbered sequence of such registers.
@@ -3327,7 +4038,7 @@
if (FoundSlot) {
for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
VFPRegs[J] = 1;
- AllocatedVFP += NumRequired;
+ AllocatedVFPs += NumRequired;
return;
}
}
@@ -3335,12 +4046,32 @@
// unallocated are marked as unavailable.
for (unsigned I = 0; I < 16; I++)
VFPRegs[I] = 1;
- AllocatedVFP = 17; // We do not have enough VFP registers.
+ AllocatedVFPs = 17; // We do not have enough VFP registers.
}
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
- unsigned &AllocatedVFP,
- bool &IsHA) const {
+/// Update AllocatedGPRs to record the number of general purpose registers
+/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
+/// this represents arguments being stored on the stack.
+void ARMABIInfo::markAllocatedGPRs(unsigned Alignment,
+ unsigned NumRequired) const {
+ assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");
+
+ if (Alignment == 2 && AllocatedGPRs & 0x1)
+ AllocatedGPRs += 1;
+
+ AllocatedGPRs += NumRequired;
+}
+
+void ARMABIInfo::resetAllocatedRegs(void) const {
+ AllocatedGPRs = 0;
+ AllocatedVFPs = 0;
+ for (unsigned i = 0; i < NumVFPs; ++i)
+ VFPRegs[i] = 0;
+}
+
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA,
+ bool isVariadic,
+ bool &IsCPRC) const {
// We update number of allocated VFPs according to
// 6.1.2.1 The following argument types are VFP CPRCs:
// A single-precision floating-point type (including promoted
@@ -3356,55 +4087,82 @@
if (Size <= 32) {
llvm::Type *ResType =
llvm::Type::getInt32Ty(getVMContext());
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 2);
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
+ markAllocatedGPRs(2, 2);
+ } else {
+ markAllocatedVFPs(2, 2);
+ IsCPRC = true;
+ }
return ABIArgInfo::getDirect(ResType);
}
if (Size == 128) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 4);
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 4, 4);
+ if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
+ markAllocatedGPRs(2, 4);
+ } else {
+ markAllocatedVFPs(4, 4);
+ IsCPRC = true;
+ }
return ABIArgInfo::getDirect(ResType);
}
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
}
// Update VFPRegs for legal vector types.
- if (const VectorType *VT = Ty->getAs<VectorType>()) {
- uint64_t Size = getContext().getTypeSize(VT);
- // Size of a legal vector should be power of 2 and above 64.
- markAllocatedVFPs(VFPRegs, AllocatedVFP, Size >= 128 ? 4 : 2, Size / 32);
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ uint64_t Size = getContext().getTypeSize(VT);
+ // Size of a legal vector should be power of 2 and above 64.
+ markAllocatedVFPs(Size >= 128 ? 4 : 2, Size / 32);
+ IsCPRC = true;
+ }
}
// Update VFPRegs for floating point types.
- if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
- if (BT->getKind() == BuiltinType::Half ||
- BT->getKind() == BuiltinType::Float)
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, 1);
- if (BT->getKind() == BuiltinType::Double ||
- BT->getKind() == BuiltinType::LongDouble)
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ if (BT->getKind() == BuiltinType::Half ||
+ BT->getKind() == BuiltinType::Float) {
+ markAllocatedVFPs(1, 1);
+ IsCPRC = true;
+ }
+ if (BT->getKind() == BuiltinType::Double ||
+ BT->getKind() == BuiltinType::LongDouble) {
+ markAllocatedVFPs(2, 2);
+ IsCPRC = true;
+ }
+ }
}
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
- if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
Ty = EnumTy->getDecl()->getIntegerType();
+ }
+ unsigned Size = getContext().getTypeSize(Ty);
+ if (!IsCPRC)
+ markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
return (Ty->isPromotableIntegerType() ?
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
}
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+ }
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
- if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
// Homogeneous Aggregates need to be expanded when we can fit the aggregate
// into VFP registers.
const Type *Base = 0;
@@ -3415,16 +4173,17 @@
if (Base->isVectorType()) {
// ElementSize is in number of floats.
unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
- markAllocatedVFPs(VFPRegs, AllocatedVFP, ElementSize,
+ markAllocatedVFPs(ElementSize,
Members * ElementSize);
} else if (Base->isSpecificBuiltinType(BuiltinType::Float))
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, Members);
+ markAllocatedVFPs(1, Members);
else {
assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
Base->isSpecificBuiltinType(BuiltinType::LongDouble));
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, Members * 2);
+ markAllocatedVFPs(2, Members * 2);
}
IsHA = true;
+ IsCPRC = true;
return ABIArgInfo::getExpand();
}
}
@@ -3439,7 +4198,9 @@
getABIKind() == ARMABIInfo::AAPCS)
ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
- return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
+ // Update Allocated GPRs
+ markAllocatedGPRs(1, 1);
+ return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
@@ -3451,9 +4212,11 @@
if (getContext().getTypeAlign(Ty) <= 32) {
ElemTy = llvm::Type::getInt32Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+ markAllocatedGPRs(1, SizeRegs);
} else {
ElemTy = llvm::Type::getInt64Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
+ markAllocatedGPRs(2, SizeRegs * 2);
}
llvm::Type *STy =
@@ -3546,13 +4309,16 @@
return true;
}
-ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
+ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
+ bool isVariadic) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
// Large vector types should be returned via memory.
- if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
+ if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
+ }
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
@@ -3565,8 +4331,10 @@
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always indirect.
- if (isRecordReturnIndirect(RetTy, getCXXABI()))
+ if (isRecordReturnIndirect(RetTy, getCXXABI())) {
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+ }
// Are we following APCS?
if (getABIKind() == APCS) {
@@ -3593,6 +4361,7 @@
}
// Otherwise return in memory.
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
}
@@ -3602,7 +4371,7 @@
return ABIArgInfo::getIgnore();
// Check for homogeneous aggregates with AAPCS-VFP.
- if (getABIKind() == AAPCS_VFP) {
+ if (getABIKind() == AAPCS_VFP && !isVariadic) {
const Type *Base = 0;
if (isHomogeneousAggregate(RetTy, Base, getContext())) {
assert(Base && "Base class should be set for homogeneous aggregate");
@@ -3623,6 +4392,7 @@
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
}
+ markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
}
@@ -3720,9 +4490,9 @@
public:
NaClARMABIInfo(CodeGen::CodeGenTypes &CGT, ARMABIInfo::ABIKind Kind)
: ABIInfo(CGT), PInfo(CGT), NInfo(CGT, Kind) {}
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
private:
PNaClABIInfo PInfo; // Used for generating calls with pnaclcall callingconv.
ARMABIInfo NInfo; // Used for everything else.
@@ -3770,10 +4540,10 @@
ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
llvm::Type *DirectTy = 0) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -3785,12 +4555,12 @@
return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 31;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const {
+ llvm::Value *Address) const override {
// 0-31 are x0-x30 and sp: 8 bytes each
llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
@@ -3813,9 +4583,8 @@
FreeIntRegs, FreeVFPRegs);
FreeIntRegs = FreeVFPRegs = 8;
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it) {
- it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs);
+ for (auto &I : FI.arguments()) {
+ I.info = classifyGenericType(I.type, FreeIntRegs, FreeVFPRegs);
}
}
@@ -3958,208 +4727,12 @@
llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
CodeGenFunction &CGF) const {
- // The AArch64 va_list type and handling is specified in the Procedure Call
- // Standard, section B.4:
- //
- // struct {
- // void *__stack;
- // void *__gr_top;
- // void *__vr_top;
- // int __gr_offs;
- // int __vr_offs;
- // };
-
- assert(!CGF.CGM.getDataLayout().isBigEndian()
- && "va_arg not implemented for big-endian AArch64");
-
int FreeIntRegs = 8, FreeVFPRegs = 8;
Ty = CGF.getContext().getCanonicalType(Ty);
ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
- llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
- llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
- llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
- llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
-
- llvm::Value *reg_offs_p = 0, *reg_offs = 0;
- int reg_top_index;
- int RegSize;
- if (FreeIntRegs < 8) {
- assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs");
- // 3 is the field number of __gr_offs
- reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
- reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
- reg_top_index = 1; // field number for __gr_top
- RegSize = 8 * (8 - FreeIntRegs);
- } else {
- assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs");
- // 4 is the field number of __vr_offs.
- reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
- reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
- reg_top_index = 2; // field number for __vr_top
- RegSize = 16 * (8 - FreeVFPRegs);
- }
-
- //=======================================
- // Find out where argument was passed
- //=======================================
-
- // If reg_offs >= 0 we're already using the stack for this type of
- // argument. We don't want to keep updating reg_offs (in case it overflows,
- // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
- // whatever they get).
- llvm::Value *UsingStack = 0;
- UsingStack = CGF.Builder.CreateICmpSGE(reg_offs,
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
-
- CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
-
- // Otherwise, at least some kind of argument could go in these registers, the
- // quesiton is whether this particular type is too big.
- CGF.EmitBlock(MaybeRegBlock);
-
- // Integer arguments may need to correct register alignment (for example a
- // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
- // align __gr_offs to calculate the potential address.
- if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
- int Align = getContext().getTypeAlign(Ty) / 8;
-
- reg_offs = CGF.Builder.CreateAdd(reg_offs,
- llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
- "align_regoffs");
- reg_offs = CGF.Builder.CreateAnd(reg_offs,
- llvm::ConstantInt::get(CGF.Int32Ty, -Align),
- "aligned_regoffs");
- }
-
- // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
- llvm::Value *NewOffset = 0;
- NewOffset = CGF.Builder.CreateAdd(reg_offs,
- llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
- "new_reg_offs");
- CGF.Builder.CreateStore(NewOffset, reg_offs_p);
-
- // Now we're in a position to decide whether this argument really was in
- // registers or not.
- llvm::Value *InRegs = 0;
- InRegs = CGF.Builder.CreateICmpSLE(NewOffset,
- llvm::ConstantInt::get(CGF.Int32Ty, 0),
- "inreg");
-
- CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
-
- //=======================================
- // Argument was in registers
- //=======================================
-
- // Now we emit the code for if the argument was originally passed in
- // registers. First start the appropriate block:
- CGF.EmitBlock(InRegBlock);
-
- llvm::Value *reg_top_p = 0, *reg_top = 0;
- reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
- reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
- llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
- llvm::Value *RegAddr = 0;
- llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
-
- if (!AI.isDirect()) {
- // If it's been passed indirectly (actually a struct), whatever we find from
- // stored registers or on the stack will actually be a struct **.
- MemTy = llvm::PointerType::getUnqual(MemTy);
- }
-
- const Type *Base = 0;
- uint64_t NumMembers;
- if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)
- && NumMembers > 1) {
- // Homogeneous aggregates passed in registers will have their elements split
- // and stored 16-bytes apart regardless of size (they're notionally in qN,
- // qN+1, ...). We reload and store into a temporary local variable
- // contiguously.
- assert(AI.isDirect() && "Homogeneous aggregates should be passed directly");
- llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
- llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
- llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
-
- for (unsigned i = 0; i < NumMembers; ++i) {
- llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i);
- llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
- LoadAddr = CGF.Builder.CreateBitCast(LoadAddr,
- llvm::PointerType::getUnqual(BaseTy));
- llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
-
- llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
- CGF.Builder.CreateStore(Elem, StoreAddr);
- }
-
- RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
- } else {
- // Otherwise the object is contiguous in memory
- RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
- }
-
- CGF.EmitBranch(ContBlock);
-
- //=======================================
- // Argument was on the stack
- //=======================================
- CGF.EmitBlock(OnStackBlock);
-
- llvm::Value *stack_p = 0, *OnStackAddr = 0;
- stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
- OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
-
- // Again, stack arguments may need realigmnent. In this case both integer and
- // floating-point ones might be affected.
- if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
- int Align = getContext().getTypeAlign(Ty) / 8;
-
- OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
-
- OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr,
- llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
- "align_stack");
- OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr,
- llvm::ConstantInt::get(CGF.Int64Ty, -Align),
- "align_stack");
-
- OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
- }
-
- uint64_t StackSize;
- if (AI.isDirect())
- StackSize = getContext().getTypeSize(Ty) / 8;
- else
- StackSize = 8;
-
- // All stack slots are 8 bytes
- StackSize = llvm::RoundUpToAlignment(StackSize, 8);
-
- llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
- llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC,
- "new_stack");
-
- // Write the new value of __stack for the next call to va_arg
- CGF.Builder.CreateStore(NewStack, stack_p);
-
- OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
-
- CGF.EmitBranch(ContBlock);
-
- //=======================================
- // Tidy up
- //=======================================
- CGF.EmitBlock(ContBlock);
-
- llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
- ResAddr->addIncoming(RegAddr, InRegBlock);
- ResAddr->addIncoming(OnStackAddr, OnStackBlock);
-
- if (AI.isDirect())
- return ResAddr;
-
- return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+ return EmitAArch64VAArg(VAListAddr, Ty, 8 - FreeIntRegs, 8 - FreeVFPRegs,
+ AI.isIndirect(), CGF);
}
//===----------------------------------------------------------------------===//
@@ -4175,18 +4748,18 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CFG) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CFG) const override;
};
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:
NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
-
- virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const;
+
+ void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
private:
static void addKernelMetadata(llvm::Function *F);
};
@@ -4218,9 +4791,8 @@
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
// Always honor user-specified calling convention.
if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -4259,7 +4831,7 @@
// CUDA __global__ functions get a kernel metadata entry. Since
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
- if (FD->getAttr<CUDAGlobalAttr>())
+ if (FD->hasAttr<CUDAGlobalAttr>())
addKernelMetadata(F);
}
}
@@ -4300,15 +4872,14 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType ArgTy) const;
- virtual void computeInfo(CGFunctionInfo &FI) const {
+ void computeInfo(CGFunctionInfo &FI) const override {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
}
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4360,9 +4931,8 @@
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
- for (CXXRecordDecl::base_class_const_iterator I = CXXRD->bases_begin(),
- E = CXXRD->bases_end(); I != E; ++I) {
- QualType Base = I->getType();
+ for (const auto &I : CXXRD->bases()) {
+ QualType Base = I.getType();
// Empty bases don't affect things either way.
if (isEmptyRecord(getContext(), Base, true))
@@ -4376,10 +4946,7 @@
}
// Check the fields.
- for (RecordDecl::field_iterator I = RD->field_begin(),
- E = RD->field_end(); I != E; ++I) {
- const FieldDecl *FD = *I;
-
+ for (const auto *FD : RD->fields()) {
// Empty bitfields don't affect things either way.
// Unlike isSingleElementStruct(), empty structure and array fields
// do count. So do anonymous bitfields that aren't zero-sized.
@@ -4452,7 +5019,7 @@
llvm::Type *IndexTy = RegCount->getType();
llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
- "fits_in_regs");
+ "fits_in_regs");
llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
@@ -4535,15 +5102,22 @@
return true;
switch (Triple.getOS()) {
- case llvm::Triple::Cygwin:
- case llvm::Triple::MinGW32:
case llvm::Triple::AuroraUX:
case llvm::Triple::DragonFly:
case llvm::Triple::FreeBSD:
case llvm::Triple::OpenBSD:
case llvm::Triple::Bitrig:
- case llvm::Triple::Win32:
return true;
+ case llvm::Triple::Win32:
+ switch (Triple.getEnvironment()) {
+ case llvm::Triple::UnknownEnvironment:
+ case llvm::Triple::Cygnus:
+ case llvm::Triple::GNU:
+ case llvm::Triple::MSVC:
+ return true;
+ default:
+ return false;
+ }
default:
return false;
}
@@ -4611,7 +5185,7 @@
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const;
+ CodeGen::CodeGenModule &M) const override;
};
}
@@ -4660,9 +5234,9 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4672,12 +5246,12 @@
: TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)),
SizeOfUnwindException(IsO32 ? 24 : 32) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 29;
}
void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
+ CodeGen::CodeGenModule &CGM) const override {
const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
if (!FD) return;
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -4690,9 +5264,9 @@
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const;
+ llvm::Value *Address) const override;
- unsigned getSizeOfUnwindException() const {
+ unsigned getSizeOfUnwindException() const override {
return SizeOfUnwindException;
}
};
@@ -4901,9 +5475,8 @@
// Check if a pointer to an aggregate is passed as a hidden argument.
uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type, Offset);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type, Offset);
}
llvm::Value* MipsABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -4984,8 +5557,8 @@
TCETargetCodeGenInfo(CodeGenTypes &CGT)
: DefaultTargetCodeGenInfo(CGT) {}
- virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const;
+ void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
};
void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
@@ -5000,9 +5573,8 @@
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL C Kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
-
- if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
-
+ const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
+ if (Attr) {
// Convert the reqd_work_group_size() attributes to metadata.
llvm::LLVMContext &Context = F->getContext();
llvm::NamedMDNode *OpenCLMetadata =
@@ -5012,14 +5584,11 @@
Operands.push_back(F);
Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty,
- llvm::APInt(32,
- FD->getAttr<ReqdWorkGroupSizeAttr>()->getXDim())));
+ llvm::APInt(32, Attr->getXDim())));
Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty,
- llvm::APInt(32,
- FD->getAttr<ReqdWorkGroupSizeAttr>()->getYDim())));
+ llvm::APInt(32, Attr->getYDim())));
Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty,
- llvm::APInt(32,
- FD->getAttr<ReqdWorkGroupSizeAttr>()->getZDim())));
+ llvm::APInt(32, Attr->getZDim())));
// Add a boolean constant operand for "required" (true) or "hint" (false)
// for implementing the work_group_size_hint attr later. Currently
@@ -5050,10 +5619,10 @@
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5061,7 +5630,7 @@
HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
:TargetCodeGenInfo(new HexagonABIInfo(CGT)) {}
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 29;
}
};
@@ -5070,9 +5639,8 @@
void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyArgumentType(it->type);
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
}
ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
@@ -5205,9 +5773,9 @@
private:
ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
- virtual void computeInfo(CGFunctionInfo &FI) const;
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
// Coercion type builder for structs passed in registers. The coercion type
// serves two purposes:
@@ -5346,6 +5914,11 @@
if (!isAggregateTypeForABI(Ty))
return ABIArgInfo::getDirect();
+ // If a C++ object has either a non-trivial copy constructor or a non-trivial
+ // destructor, it is passed with an explicit indirect pointer / sret pointer.
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+
// This is a small aggregate type that should be passed in registers.
// Build a coercion type from the LLVM struct type.
llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
@@ -5382,6 +5955,7 @@
switch (AI.getKind()) {
case ABIArgInfo::Expand:
+ case ABIArgInfo::InAlloca:
llvm_unreachable("Unsupported ABI kind for va_arg");
case ABIArgInfo::Extend:
@@ -5417,9 +5991,8 @@
void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
- for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
- it != ie; ++it)
- it->info = classifyType(it->type, 16 * 8);
+ for (auto &I : FI.arguments())
+ I.info = classifyType(I.type, 16 * 8);
}
namespace {
@@ -5427,24 +6000,65 @@
public:
SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {}
+
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
+ return 14;
+ }
+
+ bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *Address) const override;
};
} // end anonymous namespace
+bool
+SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *Address) const {
+ // This is calculated from the LLVM and GCC tables and verified
+ // against gcc output. AFAIK all ABIs use the same encoding.
+
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+
+ llvm::IntegerType *i8 = CGF.Int8Ty;
+ llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
+ llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
+
+ // 0-31: the 8-byte general-purpose registers
+ AssignToArrayRange(Builder, Address, Eight8, 0, 31);
+
+ // 32-63: f0-31, the 4-byte floating-point registers
+ AssignToArrayRange(Builder, Address, Four8, 32, 63);
+
+ // Y = 64
+ // PSR = 65
+ // WIM = 66
+ // TBR = 67
+ // PC = 68
+ // NPC = 69
+ // FSR = 70
+ // CSR = 71
+ AssignToArrayRange(Builder, Address, Eight8, 64, 71);
+
+ // 72-87: d0-15, the 8-byte floating-point registers
+ AssignToArrayRange(Builder, Address, Eight8, 72, 87);
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
-// Xcore ABI Implementation
+// XCore ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class XCoreABIInfo : public DefaultABIInfo {
public:
XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
- virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+ CodeGenFunction &CGF) const override;
};
-class XcoreTargetCodeGenInfo : public TargetCodeGenInfo {
+class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
public:
- XcoreTargetCodeGenInfo(CodeGenTypes &CGT)
+ XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
:TargetCodeGenInfo(new XCoreABIInfo(CGT)) {}
};
} // End anonymous namespace.
@@ -5468,6 +6082,7 @@
uint64_t ArgSize = 0;
switch (AI.getKind()) {
case ABIArgInfo::Expand:
+ case ABIArgInfo::InAlloca:
llvm_unreachable("Unsupported ABI kind for va_arg");
case ABIArgInfo::Ignore:
Val = llvm::UndefValue::get(ArgPtrTy);
@@ -5520,11 +6135,22 @@
case llvm::Triple::mips64el:
return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
+ case llvm::Triple::arm64: {
+ ARM64ABIInfo::ABIKind Kind = ARM64ABIInfo::AAPCS;
+ if (strcmp(getTarget().getABI(), "darwinpcs") == 0)
+ Kind = ARM64ABIInfo::DarwinPCS;
+
+ return *(TheTargetCodeGenInfo = new ARM64TargetCodeGenInfo(Types, Kind));
+ }
+
case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
case llvm::Triple::arm:
+ case llvm::Triple::armeb:
case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
{
ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
if (strcmp(getTarget().getABI(), "apcs-gnu") == 0)
@@ -5572,7 +6198,7 @@
bool IsDarwinVectorABI = Triple.isOSDarwin();
bool IsSmallStructInRegABI =
X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
- bool IsWin32FloatStructABI = (Triple.getOS() == llvm::Triple::Win32);
+ bool IsWin32FloatStructABI = Triple.isWindowsMSVCEnvironment();
if (Triple.getOS() == llvm::Triple::Win32) {
return *(TheTargetCodeGenInfo =
@@ -5610,7 +6236,6 @@
case llvm::Triple::sparcv9:
return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
case llvm::Triple::xcore:
- return *(TheTargetCodeGenInfo = new XcoreTargetCodeGenInfo(Types));
-
+ return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types));
}
}
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index f631f31..6c3ab64 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -17,8 +17,8 @@
#include "clang/AST/Type.h"
#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm {
class Constant;
@@ -123,6 +123,10 @@
return Ty;
}
+ /// doesReturnSlotInterfereWithArgs - Return true if the target uses an
+ /// argument slot for an 'sret' type.
+ virtual bool doesReturnSlotInterfereWithArgs() const { return true; }
+
/// Retrieve the address of a function to call immediately before
/// calling objc_retainAutoreleasedReturnValue. The
/// implementation of objc_autoreleaseReturnValue sniffs the
@@ -176,7 +180,7 @@
/// However, some platforms make the conventions identical except
/// for passing additional out-of-band information to a variadic
/// function: for example, x86-64 passes the number of SSE
- /// arguments in %al. On these platforms, it is desireable to
+ /// arguments in %al. On these platforms, it is desirable to
/// call unprototyped functions using the variadic convention so
/// that unprototyped calls to varargs functions still succeed.
///