AMDGPU/R600: Expand unaligned writes to local and global AS
LOCAL and GLOBAL AS only
PRIVATE needs special treatment
Differential Revision: https://reviews.llvm.org/D23971
llvm-svn: 280526
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 5e525fb..8c252e8 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1120,26 +1120,36 @@
unsigned AS = StoreNode->getAddressSpace();
SDValue Value = StoreNode->getValue();
EVT ValueVT = Value.getValueType();
+ EVT MemVT = StoreNode->getMemoryVT();
+ unsigned Align = StoreNode->getAlignment();
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
ValueVT.isVector()) {
return SplitVectorStore(Op, DAG);
}
+ // Private AS needs special fixes
+ if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) &&
+ !allowsMisalignedMemoryAccesses(MemVT, AS, Align, NULL)) {
+ return expandUnalignedStore(StoreNode, DAG);
+ }
+
SDLoc DL(Op);
SDValue Chain = StoreNode->getChain();
SDValue Ptr = StoreNode->getBasePtr();
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
+ // It is beneficial to create MSKOR here instead of combiner to avoid
+ // artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
EVT VT = Value.getValueType();
assert(VT.bitsLE(MVT::i32));
- EVT MemVT = StoreNode->getMemoryVT();
SDValue MaskConstant;
if (MemVT == MVT::i8) {
MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
} else {
assert(MemVT == MVT::i16);
+ assert(StoreNode->getAlignment() >= 2);
MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
}
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
@@ -1183,7 +1193,6 @@
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
- EVT MemVT = StoreNode->getMemoryVT();
if (MemVT.bitsLT(MVT::i32))
return lowerPrivateTruncStore(StoreNode, DAG);