AMDGPU: Refactor treatment of denormal mode
Start moving towards treating this as a property of the calling
convention, and not the subtarget. The default denormal mode should
not be part of the subtarget, and be moved into a separate function
attribute.
This patch is still NFC. The denormal mode remains as a subtarget
feature for now, but make the necessary changes to switch to using an
attribute.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f8c0820..05bb392 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -676,7 +676,8 @@
FP32Denormals(true),
FP64FP16Denormals(true) {}
- SIModeRegisterDefaults(const Function &F);
+ // FIXME: Should not depend on the subtarget
+ SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
@@ -695,10 +696,23 @@
FP64FP16Denormals == Other.FP64FP16Denormals;
}
+ /// Returns true if a flag is compatible if it's enabled in the callee, but
+ /// disabled in the caller.
+ static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
+ return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
+ }
+
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
// be able to override.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
- return *this == CalleeMode;
+ if (DX10Clamp != CalleeMode.DX10Clamp)
+ return false;
+ if (IEEE != CalleeMode.IEEE)
+ return false;
+
+ // Allow inlining denormals enabled into denormals flushed functions.
+ return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
+ oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
}
};