Generate performance warnings in HLSL translation
Generate performance warnings for some code that undergoes heavy
emulation when translated to HLSL:
1. Dynamic indexing of vectors and matrices.
2. Non-empty fall-through cases in switch/case.
The warnings are generated only when code is translated to HLSL.
Generating them in the parsing stage would add too much maintenance
burden.
Improves switch statement fall-through handling in cases where an
empty fall-through case follows a non-empty one so that extra
performance warnings are not generated.
BUG=angleproject:1116
Change-Id: I7c85d78fe7c4f8e6042bda72ceaaf6e37dadfe6c
Reviewed-on: https://chromium-review.googlesource.com/732986
Commit-Queue: Olli Etuaho <oetuaho@nvidia.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/compiler/translator/Compiler.cpp b/src/compiler/translator/Compiler.cpp
index d89f3d1..b7af2d7 100644
--- a/src/compiler/translator/Compiler.cpp
+++ b/src/compiler/translator/Compiler.cpp
@@ -626,7 +626,10 @@
OutputTree(root, infoSink.info);
if (compileOptions & SH_OBJECT_CODE)
- translate(root, compileOptions);
+ {
+ PerformanceDiagnostics perfDiagnostics(&mDiagnostics);
+ translate(root, compileOptions, &perfDiagnostics);
+ }
// The IntermNode tree doesn't need to be deleted here, since the
// memory will be freed in a big chunk by the PoolAllocator.
diff --git a/src/compiler/translator/Compiler.h b/src/compiler/translator/Compiler.h
index 085b7a9..7dd5ac0 100644
--- a/src/compiler/translator/Compiler.h
+++ b/src/compiler/translator/Compiler.h
@@ -156,8 +156,10 @@
// Add emulated functions to the built-in function emulator.
virtual void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
ShCompileOptions compileOptions){};
- // Translate to object code.
- virtual void translate(TIntermBlock *root, ShCompileOptions compileOptions) = 0;
+ // Translate to object code. May generate performance warnings through the diagnostics.
+ virtual void translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics) = 0;
// Insert statements to reference all members in unused uniform blocks with standard and shared
// layout. This is to work around a Mac driver that treats unused standard/shared
// uniform blocks as inactive.
diff --git a/src/compiler/translator/Diagnostics.cpp b/src/compiler/translator/Diagnostics.cpp
index d2fe429..1744e5a 100644
--- a/src/compiler/translator/Diagnostics.cpp
+++ b/src/compiler/translator/Diagnostics.cpp
@@ -91,4 +91,15 @@
mNumWarnings = 0;
}
+PerformanceDiagnostics::PerformanceDiagnostics(TDiagnostics *diagnostics)
+ : mDiagnostics(diagnostics)
+{
+ ASSERT(diagnostics);
+}
+
+void PerformanceDiagnostics::warning(const TSourceLoc &loc, const char *reason, const char *token)
+{
+ mDiagnostics->warning(loc, reason, token);
+}
+
} // namespace sh
diff --git a/src/compiler/translator/Diagnostics.h b/src/compiler/translator/Diagnostics.h
index 78bfb75..55b8899 100644
--- a/src/compiler/translator/Diagnostics.h
+++ b/src/compiler/translator/Diagnostics.h
@@ -50,6 +50,18 @@
int mNumWarnings;
};
+// Diagnostics wrapper to use when the code is only allowed to generate warnings.
+class PerformanceDiagnostics : public angle::NonCopyable
+{
+ public:
+ PerformanceDiagnostics(TDiagnostics *diagnostics);
+
+ void warning(const TSourceLoc &loc, const char *reason, const char *token);
+
+ private:
+ TDiagnostics *mDiagnostics;
+};
+
} // namespace sh
#endif // COMPILER_TRANSLATOR_DIAGNOSTICS_H_
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 54427a9..13a7016 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -121,7 +121,8 @@
int numRenderTargets,
const std::vector<Uniform> &uniforms,
ShCompileOptions compileOptions,
- TSymbolTable *symbolTable)
+ TSymbolTable *symbolTable,
+ PerformanceDiagnostics *perfDiagnostics)
: TIntermTraverser(true, true, true, symbolTable),
mShaderType(shaderType),
mShaderVersion(shaderVersion),
@@ -130,7 +131,8 @@
mOutputType(outputType),
mCompileOptions(compileOptions),
mNumRenderTargets(numRenderTargets),
- mCurrentFunctionMetadata(nullptr)
+ mCurrentFunctionMetadata(nullptr),
+ mPerfDiagnostics(perfDiagnostics)
{
mInsideFunction = false;
@@ -2173,7 +2175,7 @@
ASSERT(node->getStatementList());
if (visit == PreVisit)
{
- node->setStatementList(RemoveSwitchFallThrough(node->getStatementList()));
+ node->setStatementList(RemoveSwitchFallThrough(node->getStatementList(), mPerfDiagnostics));
}
outputTriplet(out, visit, "switch (", ") ", "");
// The curly braces get written when visiting the statementList block.
diff --git a/src/compiler/translator/OutputHLSL.h b/src/compiler/translator/OutputHLSL.h
index 5509842..4e8e427 100644
--- a/src/compiler/translator/OutputHLSL.h
+++ b/src/compiler/translator/OutputHLSL.h
@@ -40,7 +40,8 @@
int numRenderTargets,
const std::vector<Uniform> &uniforms,
ShCompileOptions compileOptions,
- TSymbolTable *symbolTable);
+ TSymbolTable *symbolTable,
+ PerformanceDiagnostics *perfDiagnostics);
~OutputHLSL();
@@ -241,6 +242,8 @@
// arrays can't be return values in HLSL.
std::vector<ArrayHelperFunction> mArrayConstructIntoFunctions;
+ PerformanceDiagnostics *mPerfDiagnostics;
+
private:
TString samplerNamePrefixFromStruct(TIntermTyped *node);
bool ancestorEvaluatesToSamplerInStruct();
diff --git a/src/compiler/translator/RemoveDynamicIndexing.cpp b/src/compiler/translator/RemoveDynamicIndexing.cpp
index 5344a27..7766c1a 100644
--- a/src/compiler/translator/RemoveDynamicIndexing.cpp
+++ b/src/compiler/translator/RemoveDynamicIndexing.cpp
@@ -9,6 +9,7 @@
#include "compiler/translator/RemoveDynamicIndexing.h"
+#include "compiler/translator/Diagnostics.h"
#include "compiler/translator/InfoSink.h"
#include "compiler/translator/IntermNodePatternMatcher.h"
#include "compiler/translator/IntermNode_util.h"
@@ -281,7 +282,9 @@
class RemoveDynamicIndexingTraverser : public TLValueTrackingTraverser
{
public:
- RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable, int shaderVersion);
+ RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable,
+ int shaderVersion,
+ PerformanceDiagnostics *perfDiagnostics);
bool visitBinary(Visit visit, TIntermBinary *node) override;
@@ -305,13 +308,18 @@
// V[j++][i]++.
// where V is an array of vectors, j++ will only be evaluated once.
bool mRemoveIndexSideEffectsInSubtree;
+
+ PerformanceDiagnostics *mPerfDiagnostics;
};
-RemoveDynamicIndexingTraverser::RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable,
- int shaderVersion)
+RemoveDynamicIndexingTraverser::RemoveDynamicIndexingTraverser(
+ TSymbolTable *symbolTable,
+ int shaderVersion,
+ PerformanceDiagnostics *perfDiagnostics)
: TLValueTrackingTraverser(true, false, false, symbolTable, shaderVersion),
mUsedTreeInsertion(false),
- mRemoveIndexSideEffectsInSubtree(false)
+ mRemoveIndexSideEffectsInSubtree(false),
+ mPerfDiagnostics(perfDiagnostics)
{
}
@@ -398,6 +406,10 @@
}
else if (IntermNodePatternMatcher::IsDynamicIndexingOfVectorOrMatrix(node))
{
+ mPerfDiagnostics->warning(node->getLine(),
+ "Performance: dynamic indexing of vectors and "
+ "matrices is emulated and can be slow.",
+ "[]");
bool write = isLValueRequiredHere();
#if defined(ANGLE_ENABLE_ASSERTS)
@@ -515,9 +527,12 @@
} // namespace
-void RemoveDynamicIndexing(TIntermNode *root, TSymbolTable *symbolTable, int shaderVersion)
+void RemoveDynamicIndexing(TIntermNode *root,
+ TSymbolTable *symbolTable,
+ int shaderVersion,
+ PerformanceDiagnostics *perfDiagnostics)
{
- RemoveDynamicIndexingTraverser traverser(symbolTable, shaderVersion);
+ RemoveDynamicIndexingTraverser traverser(symbolTable, shaderVersion, perfDiagnostics);
do
{
traverser.nextIteration();
diff --git a/src/compiler/translator/RemoveDynamicIndexing.h b/src/compiler/translator/RemoveDynamicIndexing.h
index 9f85417..543cf56 100644
--- a/src/compiler/translator/RemoveDynamicIndexing.h
+++ b/src/compiler/translator/RemoveDynamicIndexing.h
@@ -15,8 +15,12 @@
class TIntermNode;
class TSymbolTable;
+class PerformanceDiagnostics;
-void RemoveDynamicIndexing(TIntermNode *root, TSymbolTable *symbolTable, int shaderVersion);
+void RemoveDynamicIndexing(TIntermNode *root,
+ TSymbolTable *symbolTable,
+ int shaderVersion,
+ PerformanceDiagnostics *perfDiagnostics);
} // namespace sh
diff --git a/src/compiler/translator/RemoveSwitchFallThrough.cpp b/src/compiler/translator/RemoveSwitchFallThrough.cpp
index 10da1df..ab2045d 100644
--- a/src/compiler/translator/RemoveSwitchFallThrough.cpp
+++ b/src/compiler/translator/RemoveSwitchFallThrough.cpp
@@ -10,6 +10,7 @@
#include "compiler/translator/RemoveSwitchFallThrough.h"
+#include "compiler/translator/Diagnostics.h"
#include "compiler/translator/IntermTraverse.h"
namespace sh
@@ -21,10 +22,12 @@
class RemoveSwitchFallThroughTraverser : public TIntermTraverser
{
public:
- static TIntermBlock *removeFallThrough(TIntermBlock *statementList);
+ static TIntermBlock *removeFallThrough(TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics);
private:
- RemoveSwitchFallThroughTraverser(TIntermBlock *statementList);
+ RemoveSwitchFallThroughTraverser(TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics);
void visitSymbol(TIntermSymbol *node) override;
void visitConstantUnion(TIntermConstantUnion *node) override;
@@ -49,11 +52,14 @@
bool mLastStatementWasBreak;
TIntermBlock *mPreviousCase;
std::vector<TIntermBlock *> mCasesSharingBreak;
+ PerformanceDiagnostics *mPerfDiagnostics;
};
-TIntermBlock *RemoveSwitchFallThroughTraverser::removeFallThrough(TIntermBlock *statementList)
+TIntermBlock *RemoveSwitchFallThroughTraverser::removeFallThrough(
+ TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics)
{
- RemoveSwitchFallThroughTraverser rm(statementList);
+ RemoveSwitchFallThroughTraverser rm(statementList, perfDiagnostics);
ASSERT(statementList);
statementList->traverse(&rm);
ASSERT(rm.mPreviousCase || statementList->getSequence()->empty());
@@ -69,11 +75,14 @@
return rm.mStatementListOut;
}
-RemoveSwitchFallThroughTraverser::RemoveSwitchFallThroughTraverser(TIntermBlock *statementList)
+RemoveSwitchFallThroughTraverser::RemoveSwitchFallThroughTraverser(
+ TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics)
: TIntermTraverser(true, false, false),
mStatementList(statementList),
mLastStatementWasBreak(false),
- mPreviousCase(nullptr)
+ mPreviousCase(nullptr),
+ mPerfDiagnostics(perfDiagnostics)
{
mStatementListOut = new TIntermBlock();
}
@@ -158,14 +167,10 @@
mCasesSharingBreak.push_back(mPreviousCase);
if (mLastStatementWasBreak)
{
- bool labelsWithNoStatements = true;
for (size_t i = 0; i < mCasesSharingBreak.size(); ++i)
{
- if (mCasesSharingBreak.at(i)->getSequence()->size() > 1)
- {
- labelsWithNoStatements = false;
- }
- if (labelsWithNoStatements)
+ ASSERT(!mCasesSharingBreak.at(i)->getSequence()->empty());
+ if (mCasesSharingBreak.at(i)->getSequence()->size() == 1)
{
// Fall-through is allowed in case the label has no statements.
outputSequence(mCasesSharingBreak.at(i)->getSequence(), 0);
@@ -173,6 +178,13 @@
else
{
// Include all the statements that this case can fall through under the same label.
+ if (mCasesSharingBreak.size() > i + 1u)
+ {
+ mPerfDiagnostics->warning(mCasesSharingBreak.at(i)->getLine(),
+ "Performance: non-empty fall-through cases in "
+ "switch statements generate extra code.",
+ "switch");
+ }
for (size_t j = i; j < mCasesSharingBreak.size(); ++j)
{
size_t startIndex =
@@ -192,6 +204,7 @@
handlePreviousCase();
mPreviousCase = new TIntermBlock();
mPreviousCase->getSequence()->push_back(node);
+ mPreviousCase->setLine(node->getLine());
// Don't traverse the condition of the case statement
return false;
}
@@ -248,9 +261,10 @@
} // anonymous namespace
-TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList)
+TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics)
{
- return RemoveSwitchFallThroughTraverser::removeFallThrough(statementList);
+ return RemoveSwitchFallThroughTraverser::removeFallThrough(statementList, perfDiagnostics);
}
} // namespace sh
diff --git a/src/compiler/translator/RemoveSwitchFallThrough.h b/src/compiler/translator/RemoveSwitchFallThrough.h
index 694b9f0..7a3b196 100644
--- a/src/compiler/translator/RemoveSwitchFallThrough.h
+++ b/src/compiler/translator/RemoveSwitchFallThrough.h
@@ -15,10 +15,12 @@
{
class TIntermBlock;
+class PerformanceDiagnostics;
// When given a statementList from a switch AST node, return an updated
// statementList that has fall-through removed.
-TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList);
+TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList,
+ PerformanceDiagnostics *perfDiagnostics);
} // namespace sh
diff --git a/src/compiler/translator/TranslatorESSL.cpp b/src/compiler/translator/TranslatorESSL.cpp
index 39d0f16..ff4b098 100644
--- a/src/compiler/translator/TranslatorESSL.cpp
+++ b/src/compiler/translator/TranslatorESSL.cpp
@@ -30,7 +30,9 @@
}
}
-void TranslatorESSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorESSL::translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics * /*perfDiagnostics*/)
{
// The ESSL output doesn't define a default precision for float, so float literal statements
// end up with no precision which is invalid ESSL.
diff --git a/src/compiler/translator/TranslatorESSL.h b/src/compiler/translator/TranslatorESSL.h
index 7e5a897..24dc738 100644
--- a/src/compiler/translator/TranslatorESSL.h
+++ b/src/compiler/translator/TranslatorESSL.h
@@ -21,7 +21,9 @@
void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
ShCompileOptions compileOptions) override;
- void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+ void translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics) override;
bool shouldFlattenPragmaStdglInvariantAll() override;
private:
diff --git a/src/compiler/translator/TranslatorGLSL.cpp b/src/compiler/translator/TranslatorGLSL.cpp
index 8a735d8..a14e69e 100644
--- a/src/compiler/translator/TranslatorGLSL.cpp
+++ b/src/compiler/translator/TranslatorGLSL.cpp
@@ -45,7 +45,9 @@
InitBuiltInFunctionEmulatorForGLSLMissingFunctions(emu, getShaderType(), targetGLSLVersion);
}
-void TranslatorGLSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorGLSL::translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics * /*perfDiagnostics*/)
{
TInfoSinkBase &sink = getInfoSink().obj;
diff --git a/src/compiler/translator/TranslatorGLSL.h b/src/compiler/translator/TranslatorGLSL.h
index 335d543..982d0e5 100644
--- a/src/compiler/translator/TranslatorGLSL.h
+++ b/src/compiler/translator/TranslatorGLSL.h
@@ -21,7 +21,9 @@
void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
ShCompileOptions compileOptions) override;
- void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+ void translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics) override;
bool shouldFlattenPragmaStdglInvariantAll() override;
bool shouldCollectVariables(ShCompileOptions compileOptions) override;
diff --git a/src/compiler/translator/TranslatorHLSL.cpp b/src/compiler/translator/TranslatorHLSL.cpp
index ddf25be..091a649 100644
--- a/src/compiler/translator/TranslatorHLSL.cpp
+++ b/src/compiler/translator/TranslatorHLSL.cpp
@@ -34,7 +34,9 @@
{
}
-void TranslatorHLSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorHLSL::translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics)
{
const ShBuiltInResources &resources = getResources();
int numRenderTargets = resources.EXT_draw_buffers ? resources.MaxDrawBuffers : 1;
@@ -71,7 +73,7 @@
if (!shouldRunLoopAndIndexingValidation(compileOptions))
{
// HLSL doesn't support dynamic indexing of vectors and matrices.
- RemoveDynamicIndexing(root, &getSymbolTable(), getShaderVersion());
+ RemoveDynamicIndexing(root, &getSymbolTable(), getShaderVersion(), perfDiagnostics);
}
// Work around D3D9 bug that would manifest in vertex shaders with selection blocks which
@@ -126,7 +128,7 @@
sh::OutputHLSL outputHLSL(getShaderType(), getShaderVersion(), getExtensionBehavior(),
getSourcePath(), getOutputType(), numRenderTargets, getUniforms(),
- compileOptions, &getSymbolTable());
+ compileOptions, &getSymbolTable(), perfDiagnostics);
outputHLSL.output(root, getInfoSink().obj);
diff --git a/src/compiler/translator/TranslatorHLSL.h b/src/compiler/translator/TranslatorHLSL.h
index ca8241f..a55d906 100644
--- a/src/compiler/translator/TranslatorHLSL.h
+++ b/src/compiler/translator/TranslatorHLSL.h
@@ -24,7 +24,9 @@
const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
protected:
- void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+ void translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics) override;
bool shouldFlattenPragmaStdglInvariantAll() override;
// collectVariables needs to be run always so registers can be assigned.
diff --git a/src/compiler/translator/TranslatorVulkan.cpp b/src/compiler/translator/TranslatorVulkan.cpp
index 2e42b79..f78f46e 100644
--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@@ -94,7 +94,9 @@
{
}
-void TranslatorVulkan::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorVulkan::translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics * /*perfDiagnostics*/)
{
TInfoSinkBase &sink = getInfoSink().obj;
diff --git a/src/compiler/translator/TranslatorVulkan.h b/src/compiler/translator/TranslatorVulkan.h
index f7a7cd9..ef67b15 100644
--- a/src/compiler/translator/TranslatorVulkan.h
+++ b/src/compiler/translator/TranslatorVulkan.h
@@ -23,7 +23,9 @@
TranslatorVulkan(sh::GLenum type, ShShaderSpec spec);
protected:
- void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+ void translate(TIntermBlock *root,
+ ShCompileOptions compileOptions,
+ PerformanceDiagnostics *perfDiagnostics) override;
bool shouldFlattenPragmaStdglInvariantAll() override;
};