[RISCV] Add codegen support for RV64A
In order to support codegen RV64A, this patch:
* Introduces masked atomics intrinsics for atomicrmw operations and cmpxchg
that use the i64 type. These are ultimately lowered to masked operations
using lr.w/sc.w, but we need to use these alternate intrinsics for RV64
because i32 is not legal
* Modifies RISCVExpandPseudoInsts.cpp to handle PseudoAtomicLoadNand64 and
PseudoCmpXchg64
* Modifies the AtomicExpandPass hooks in RISCVTargetLowering to sext/trunc as
needed for RV64 and to select the i64 intrinsic IDs when necessary
* Adds appropriate patterns to RISCVInstrInfoA.td
* Updates test/CodeGen/RISCV/atomic-*.ll to show RV64A support
This ends up being a fairly mechanical change, as the logic for RV32A is
effectively reused.
Differential Revision: https://reviews.llvm.org/D53233
llvm-svn: 351422
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 35c185a..55275de 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -87,6 +87,9 @@
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
+ case RISCV::PseudoAtomicLoadNand64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
+ NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
@@ -111,6 +114,8 @@
NextMBBI);
case RISCV::PseudoCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoCmpXchg64:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case RISCV::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
}
@@ -152,12 +157,61 @@
}
}
+static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_D;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_D_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_D;
+ case AtomicOrdering::Release:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_D_AQ_RL;
+ }
+}
+
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getLRForRMW32(Ordering);
+ if (Width == 64)
+ return getLRForRMW64(Ordering);
+ llvm_unreachable("Unexpected LR width\n");
+}
+
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getSCForRMW32(Ordering);
+ if (Width == 64)
+ return getSCForRMW64(Ordering);
+ llvm_unreachable("Unexpected SC width\n");
+}
+
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
AtomicRMWInst::BinOp BinOp, int Width) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
unsigned DestReg = MI.getOperand(0).getReg();
unsigned ScratchReg = MI.getOperand(1).getReg();
unsigned AddrReg = MI.getOperand(2).getReg();
@@ -166,11 +220,11 @@
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
// .loop:
- // lr.w dest, (addr)
+ // lr.[w|d] dest, (addr)
// binop scratch, dest, val
- // sc.w scratch, scratch, (addr)
+ // sc.[w|d] scratch, scratch, (addr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -184,7 +238,7 @@
.addImm(-1);
break;
}
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -219,7 +273,7 @@
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
unsigned DestReg = MI.getOperand(0).getReg();
unsigned ScratchReg = MI.getOperand(1).getReg();
unsigned AddrReg = MI.getOperand(2).getReg();
@@ -333,7 +387,7 @@
MachineBasicBlock::iterator &NextMBBI) {
assert(IsMasked == true &&
"Should only need to expand masked atomic max/min");
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
@@ -451,7 +505,6 @@
bool RISCVExpandPseudo::expandAtomicCmpXchg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB.getParent();
@@ -483,18 +536,18 @@
if (!IsMasked) {
// .loophead:
- // lr.w dest, (addr)
+ // lr.[w|d] dest, (addr)
// bne dest, cmpval, done
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(DestReg)
.addReg(CmpValReg)
.addMBB(DoneMBB);
// .looptail:
- // sc.w scratch, newval, (addr)
+ // sc.[w|d] scratch, newval, (addr)
// bnez scratch, loophead
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(NewValReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -507,7 +560,7 @@
// and scratch, dest, mask
// bne scratch, cmpval, done
unsigned MaskReg = MI.getOperand(5).getReg();
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -525,7 +578,7 @@
// bnez scratch, loophead
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
MaskReg, ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 508dcbd..5f55cea 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1728,37 +1728,74 @@
}
static Intrinsic::ID
-getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Xchg:
- return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
- case AtomicRMWInst::Add:
- return Intrinsic::riscv_masked_atomicrmw_add_i32;
- case AtomicRMWInst::Sub:
- return Intrinsic::riscv_masked_atomicrmw_sub_i32;
- case AtomicRMWInst::Nand:
- return Intrinsic::riscv_masked_atomicrmw_nand_i32;
- case AtomicRMWInst::Max:
- return Intrinsic::riscv_masked_atomicrmw_max_i32;
- case AtomicRMWInst::Min:
- return Intrinsic::riscv_masked_atomicrmw_min_i32;
- case AtomicRMWInst::UMax:
- return Intrinsic::riscv_masked_atomicrmw_umax_i32;
- case AtomicRMWInst::UMin:
- return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
+ if (XLen == 32) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i32;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+ }
}
+
+ if (XLen == 64) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i64;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i64;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i64;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i64;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i64;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i64;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i64;
+ }
+ }
+
+ llvm_unreachable("Unexpected XLen\n");
}
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+ unsigned XLen = Subtarget.getXLen();
+ Value *Ordering =
+ Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
- getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+ getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
+
+ if (XLen == 64) {
+ Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
+ }
+
+ Value *Result;
// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
@@ -1770,13 +1807,18 @@
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
- Value *SextShamt = Builder.CreateSub(
- Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
- return Builder.CreateCall(LrwOpScwLoop,
- {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ Value *SextShamt =
+ Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
+ Result = Builder.CreateCall(LrwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ } else {
+ Result =
+ Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}
- return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ if (XLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
}
TargetLowering::AtomicExpansionKind
@@ -1791,10 +1833,21 @@
Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+ unsigned XLen = Subtarget.getXLen();
+ Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
+ if (XLen == 64) {
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
+ }
Type *Tys[] = {AlignedAddr->getType()};
- Function *MaskedCmpXchg = Intrinsic::getDeclaration(
- CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
- return Builder.CreateCall(MaskedCmpXchg,
- {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ Function *MaskedCmpXchg =
+ Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
+ Value *Result = Builder.CreateCall(
+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ if (XLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 9cb1d2f..0275d32 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -235,7 +235,7 @@
class PseudoCmpXchg
: Pseudo<(outs GPR:$res, GPR:$scratch),
- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, ixlenimm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -263,7 +263,7 @@
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
- i32imm:$ordering), []> {
+ ixlenimm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -276,3 +276,79 @@
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
} // Predicates = [HasStdExtA]
+
+let Predicates = [HasStdExtA, IsRV64] in {
+
+/// 64-bit atomic loads and stores
+
+// Fences will be inserted for atomic load/stores according to the logic in
+// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+defm : LdPat<atomic_load_64, LD>;
+defm : AtomicStPat<atomic_store_64, SD, GPR>;
+
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D">;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D">;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D">;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D">;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D">;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D">;
+
+/// 64-bit AMOs
+
+def : Pat<(atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr),
+ (AMOADD_D GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acquire GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_release GPR:$addr, GPR:$incr),
+ (AMOADD_D_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// 64-bit pseudo AMOs
+
+def PseudoAtomicLoadNand64 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_64_acquire GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_64_release GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
+ PseudoMaskedAtomicSwap32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
+ PseudoMaskedAtomicLoadAdd32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
+ PseudoMaskedAtomicLoadSub32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
+ PseudoMaskedAtomicLoadNand32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
+ PseudoMaskedAtomicLoadMin32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
+ PseudoMaskedAtomicLoadUMax32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
+ PseudoMaskedAtomicLoadUMin32>;
+
+/// 64-bit compare and exchange
+
+def PseudoCmpXchg64 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
+
+def : Pat<(int_riscv_masked_cmpxchg_i64
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+} // Predicates = [HasStdExtA, IsRV64]