Optimize HLSL zero initializer
Currently we initialize a variable using zero initializer. Take the
below variable for example:
uint var[4];
We translate it to:
uint var[4] = { 0, 0, 0, 0};
If the array size is large, we have to use very long zero initializer.
The problem is that it's very slow for D3D drivers to compile.
This CL uses the 'static' trick below to solve the problem:
static uint _ANGLE_ZEROS_[256];
...
uint var[516] = {_ANGLE_ZEROS_, _ANGLE_ZEROS_, 0, 0, 0, 0};
For 'static', if the declaration does not include an initializer, the
value is set to zero.
https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx-graphics-hlsl-variable-syntax
Bug: chromium:898030
Change-Id: Ia3f6574b5ddaffa94bf971140eba95835ee105ee
Reviewed-on: https://chromium-review.googlesource.com/c/1332805
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 05d892f..9bae425 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -116,6 +116,44 @@
IsAtomicFunction(node.getRight()->getAsAggregate()->getOp());
}
+const char *kZeros = "_ANGLE_ZEROS_";
+constexpr int kZeroCount = 256;
+std::string DefineZeroArray()
+{
+ std::stringstream ss;
+ // For 'static', if the declaration does not include an initializer, the value is set to zero.
+ // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx-graphics-hlsl-variable-syntax
+ ss << "static uint " << kZeros << "[" << kZeroCount << "];\n";
+ return ss.str();
+}
+
+std::string GetZeroInitializer(size_t size)
+{
+ std::stringstream ss;
+ size_t quotient = size / kZeroCount;
+ size_t reminder = size % kZeroCount;
+
+ for (size_t i = 0; i < quotient; ++i)
+ {
+ if (i != 0)
+ {
+ ss << ", ";
+ }
+ ss << kZeros;
+ }
+
+ for (size_t i = 0; i < reminder; ++i)
+ {
+ if (quotient != 0 || i != 0)
+ {
+ ss << ", ";
+ }
+ ss << "0";
+ }
+
+ return ss.str();
+}
+
} // anonymous namespace
TReferencedBlock::TReferencedBlock(const TInterfaceBlock *aBlock,
@@ -225,6 +263,7 @@
mUsesDiscardRewriting = false;
mUsesNestedBreak = false;
mRequiresIEEEStrictCompiling = false;
+ mUseZeroArray = false;
mUniqueIndex = 0;
@@ -553,6 +592,11 @@
// https://github.com/KhronosGroup/OpenGL-API/issues/5
out << "\n#define ATOMIC_COUNTER_ARRAY_STRIDE 4\n\n";
+ if (mUseZeroArray)
+ {
+ out << DefineZeroArray() << "\n";
+ }
+
if (mShaderType == GL_FRAGMENT_SHADER)
{
const bool usingMRTExtension =
@@ -2028,9 +2072,6 @@
{
symbol->traverse(this);
out << ArrayString(symbol->getType());
- // Add initializer only when requested. It is very slow for D3D11 drivers to
- // compile a compute shader if we add code to initialize a groupshared array
- // variable with a large array size.
if (declarator->getQualifier() != EvqShared ||
mCompileOptions & SH_INIT_SHARED_VARIABLES)
{
@@ -2965,20 +3006,16 @@
}
}
-TString OutputHLSL::zeroInitializer(const TType &type)
+TString OutputHLSL::zeroInitializer(const TType &type) const
{
TString string;
size_t size = type.getObjectSize();
- for (size_t component = 0; component < size; component++)
+ if (size >= kZeroCount)
{
- string += "0";
-
- if (component + 1 < size)
- {
- string += ", ";
- }
+ mUseZeroArray = true;
}
+ string = GetZeroInitializer(size).c_str();
return "{" + string + "}";
}