[AMDGPU] add LDS f32 intrinsics
added llvm.amdgcn.atomic.{add|min|max}.f32 intrinsics
to allow generate ds_{add|min|max}[_rtn]_f32 instructions
needed for OpenCL float atomics in LDS
Reviewed by: arsenm
Differential Revision: https://reviews.llvm.org/D37985
llvm-svn: 322656
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 7d66c0f..e4591649 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -260,7 +260,10 @@
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:{
+ case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_atomic_fadd:
+ case Intrinsic::amdgcn_atomic_fmin:
+ case Intrinsic::amdgcn_atomic_fmax: {
const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
if (!IsVolatile || !IsVolatile->isZero())
return false;
@@ -289,6 +292,9 @@
case Intrinsic::objectsize:
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_atomic_fadd:
+ case Intrinsic::amdgcn_atomic_fmin:
+ case Intrinsic::amdgcn_atomic_fmax:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;