AMDGPU: Add default denormal mode to MachineFunctionInfo
The default FP mode should really be a property of a specific
function, and not a subtarget. Introduce the necessary fields to the
SIMachineFunctionInfo to help move towards this goal.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cf771..64739cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1151,6 +1151,8 @@
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+ MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
+ MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7d70c78..0d6153d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,17 +236,23 @@
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
+ bool FP32Denormals = true;
+ bool FP64FP16Denormals = true;
SIMode() = default;
-
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
+ FP32Denormals = Mode.FP32Denormals;
+ FP64FP16Denormals = Mode.FP64FP16Denormals;
}
bool operator ==(const SIMode Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE &&
+ DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
}
};
@@ -254,6 +260,8 @@
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
+ YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
}
};
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f78dadd..f8c0820 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -659,23 +659,40 @@
/// clamp NaN to zero; otherwise, pass NaN through.
bool DX10Clamp : 1;
- // TODO: FP mode fields
+ /// If this is set, neither input or output denormals are flushed for most f32
+ /// instructions.
+ ///
+ /// TODO: Split into separate input and output fields if necessary like the
+ /// control bits really provide?
+ bool FP32Denormals : 1;
+
+ /// If this is set, neither input or output denormals are flushed for both f64
+ /// and f16/v2f16 instructions.
+ bool FP64FP16Denormals : 1;
SIModeRegisterDefaults() :
IEEE(true),
- DX10Clamp(true) {}
+ DX10Clamp(true),
+ FP32Denormals(true),
+ FP64FP16Denormals(true) {}
SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ const bool IsCompute = AMDGPU::isCompute(CC);
+
SIModeRegisterDefaults Mode;
Mode.DX10Clamp = true;
- Mode.IEEE = AMDGPU::isCompute(CC);
+ Mode.IEEE = IsCompute;
+ Mode.FP32Denormals = false; // FIXME: Should be on by default.
+ Mode.FP64FP16Denormals = true;
return Mode;
}
bool operator ==(const SIModeRegisterDefaults Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
}
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should