Generate performance warnings in HLSL translation

Generate performance warnings for some code that undergoes heavy
emulation when translated to HLSL:
1. Dynamic indexing of vectors and matrices.
2. Non-empty fall-through cases in switch/case.

The warnings are generated only when code is translated to HLSL.
Generating them in the parsing stage would add too much maintenance
burden.

Improves switch statement fall-through handling in cases where an
empty fall-through case follows a non-empty one so that extra
performance warnings are not generated.

BUG=angleproject:1116

Change-Id: I7c85d78fe7c4f8e6042bda72ceaaf6e37dadfe6c
Reviewed-on: https://chromium-review.googlesource.com/732986
Commit-Queue: Olli Etuaho <oetuaho@nvidia.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/compiler/translator/Compiler.cpp b/src/compiler/translator/Compiler.cpp
index d89f3d1..b7af2d7 100644
--- a/src/compiler/translator/Compiler.cpp
+++ b/src/compiler/translator/Compiler.cpp
@@ -626,7 +626,10 @@
             OutputTree(root, infoSink.info);
 
         if (compileOptions & SH_OBJECT_CODE)
-            translate(root, compileOptions);
+        {
+            PerformanceDiagnostics perfDiagnostics(&mDiagnostics);
+            translate(root, compileOptions, &perfDiagnostics);
+        }
 
         // The IntermNode tree doesn't need to be deleted here, since the
         // memory will be freed in a big chunk by the PoolAllocator.
diff --git a/src/compiler/translator/Compiler.h b/src/compiler/translator/Compiler.h
index 085b7a9..7dd5ac0 100644
--- a/src/compiler/translator/Compiler.h
+++ b/src/compiler/translator/Compiler.h
@@ -156,8 +156,10 @@
     // Add emulated functions to the built-in function emulator.
     virtual void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
                                              ShCompileOptions compileOptions){};
-    // Translate to object code.
-    virtual void translate(TIntermBlock *root, ShCompileOptions compileOptions) = 0;
+    // Translate to object code. May generate performance warnings through the diagnostics.
+    virtual void translate(TIntermBlock *root,
+                           ShCompileOptions compileOptions,
+                           PerformanceDiagnostics *perfDiagnostics) = 0;
     // Insert statements to reference all members in unused uniform blocks with standard and shared
     // layout. This is to work around a Mac driver that treats unused standard/shared
     // uniform blocks as inactive.
diff --git a/src/compiler/translator/Diagnostics.cpp b/src/compiler/translator/Diagnostics.cpp
index d2fe429..1744e5a 100644
--- a/src/compiler/translator/Diagnostics.cpp
+++ b/src/compiler/translator/Diagnostics.cpp
@@ -91,4 +91,15 @@
     mNumWarnings = 0;
 }
 
+PerformanceDiagnostics::PerformanceDiagnostics(TDiagnostics *diagnostics)
+    : mDiagnostics(diagnostics)
+{
+    ASSERT(diagnostics);
+}
+
+void PerformanceDiagnostics::warning(const TSourceLoc &loc, const char *reason, const char *token)
+{
+    mDiagnostics->warning(loc, reason, token);
+}
+
 }  // namespace sh
diff --git a/src/compiler/translator/Diagnostics.h b/src/compiler/translator/Diagnostics.h
index 78bfb75..55b8899 100644
--- a/src/compiler/translator/Diagnostics.h
+++ b/src/compiler/translator/Diagnostics.h
@@ -50,6 +50,18 @@
     int mNumWarnings;
 };
 
+// Diagnostics wrapper to use when the code is only allowed to generate warnings.
+class PerformanceDiagnostics : public angle::NonCopyable
+{
+  public:
+    PerformanceDiagnostics(TDiagnostics *diagnostics);
+
+    void warning(const TSourceLoc &loc, const char *reason, const char *token);
+
+  private:
+    TDiagnostics *mDiagnostics;
+};
+
 }  // namespace sh
 
 #endif  // COMPILER_TRANSLATOR_DIAGNOSTICS_H_
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 54427a9..13a7016 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -121,7 +121,8 @@
                        int numRenderTargets,
                        const std::vector<Uniform> &uniforms,
                        ShCompileOptions compileOptions,
-                       TSymbolTable *symbolTable)
+                       TSymbolTable *symbolTable,
+                       PerformanceDiagnostics *perfDiagnostics)
     : TIntermTraverser(true, true, true, symbolTable),
       mShaderType(shaderType),
       mShaderVersion(shaderVersion),
@@ -130,7 +131,8 @@
       mOutputType(outputType),
       mCompileOptions(compileOptions),
       mNumRenderTargets(numRenderTargets),
-      mCurrentFunctionMetadata(nullptr)
+      mCurrentFunctionMetadata(nullptr),
+      mPerfDiagnostics(perfDiagnostics)
 {
     mInsideFunction = false;
 
@@ -2173,7 +2175,7 @@
     ASSERT(node->getStatementList());
     if (visit == PreVisit)
     {
-        node->setStatementList(RemoveSwitchFallThrough(node->getStatementList()));
+        node->setStatementList(RemoveSwitchFallThrough(node->getStatementList(), mPerfDiagnostics));
     }
     outputTriplet(out, visit, "switch (", ") ", "");
     // The curly braces get written when visiting the statementList block.
diff --git a/src/compiler/translator/OutputHLSL.h b/src/compiler/translator/OutputHLSL.h
index 5509842..4e8e427 100644
--- a/src/compiler/translator/OutputHLSL.h
+++ b/src/compiler/translator/OutputHLSL.h
@@ -40,7 +40,8 @@
                int numRenderTargets,
                const std::vector<Uniform> &uniforms,
                ShCompileOptions compileOptions,
-               TSymbolTable *symbolTable);
+               TSymbolTable *symbolTable,
+               PerformanceDiagnostics *perfDiagnostics);
 
     ~OutputHLSL();
 
@@ -241,6 +242,8 @@
     // arrays can't be return values in HLSL.
     std::vector<ArrayHelperFunction> mArrayConstructIntoFunctions;
 
+    PerformanceDiagnostics *mPerfDiagnostics;
+
   private:
     TString samplerNamePrefixFromStruct(TIntermTyped *node);
     bool ancestorEvaluatesToSamplerInStruct();
diff --git a/src/compiler/translator/RemoveDynamicIndexing.cpp b/src/compiler/translator/RemoveDynamicIndexing.cpp
index 5344a27..7766c1a 100644
--- a/src/compiler/translator/RemoveDynamicIndexing.cpp
+++ b/src/compiler/translator/RemoveDynamicIndexing.cpp
@@ -9,6 +9,7 @@
 
 #include "compiler/translator/RemoveDynamicIndexing.h"
 
+#include "compiler/translator/Diagnostics.h"
 #include "compiler/translator/InfoSink.h"
 #include "compiler/translator/IntermNodePatternMatcher.h"
 #include "compiler/translator/IntermNode_util.h"
@@ -281,7 +282,9 @@
 class RemoveDynamicIndexingTraverser : public TLValueTrackingTraverser
 {
   public:
-    RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable, int shaderVersion);
+    RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable,
+                                   int shaderVersion,
+                                   PerformanceDiagnostics *perfDiagnostics);
 
     bool visitBinary(Visit visit, TIntermBinary *node) override;
 
@@ -305,13 +308,18 @@
     //   V[j++][i]++.
     // where V is an array of vectors, j++ will only be evaluated once.
     bool mRemoveIndexSideEffectsInSubtree;
+
+    PerformanceDiagnostics *mPerfDiagnostics;
 };
 
-RemoveDynamicIndexingTraverser::RemoveDynamicIndexingTraverser(TSymbolTable *symbolTable,
-                                                               int shaderVersion)
+RemoveDynamicIndexingTraverser::RemoveDynamicIndexingTraverser(
+    TSymbolTable *symbolTable,
+    int shaderVersion,
+    PerformanceDiagnostics *perfDiagnostics)
     : TLValueTrackingTraverser(true, false, false, symbolTable, shaderVersion),
       mUsedTreeInsertion(false),
-      mRemoveIndexSideEffectsInSubtree(false)
+      mRemoveIndexSideEffectsInSubtree(false),
+      mPerfDiagnostics(perfDiagnostics)
 {
 }
 
@@ -398,6 +406,10 @@
         }
         else if (IntermNodePatternMatcher::IsDynamicIndexingOfVectorOrMatrix(node))
         {
+            mPerfDiagnostics->warning(node->getLine(),
+                                      "Performance: dynamic indexing of vectors and "
+                                      "matrices is emulated and can be slow.",
+                                      "[]");
             bool write = isLValueRequiredHere();
 
 #if defined(ANGLE_ENABLE_ASSERTS)
@@ -515,9 +527,12 @@
 
 }  // namespace
 
-void RemoveDynamicIndexing(TIntermNode *root, TSymbolTable *symbolTable, int shaderVersion)
+void RemoveDynamicIndexing(TIntermNode *root,
+                           TSymbolTable *symbolTable,
+                           int shaderVersion,
+                           PerformanceDiagnostics *perfDiagnostics)
 {
-    RemoveDynamicIndexingTraverser traverser(symbolTable, shaderVersion);
+    RemoveDynamicIndexingTraverser traverser(symbolTable, shaderVersion, perfDiagnostics);
     do
     {
         traverser.nextIteration();
diff --git a/src/compiler/translator/RemoveDynamicIndexing.h b/src/compiler/translator/RemoveDynamicIndexing.h
index 9f85417..543cf56 100644
--- a/src/compiler/translator/RemoveDynamicIndexing.h
+++ b/src/compiler/translator/RemoveDynamicIndexing.h
@@ -15,8 +15,12 @@
 
 class TIntermNode;
 class TSymbolTable;
+class PerformanceDiagnostics;
 
-void RemoveDynamicIndexing(TIntermNode *root, TSymbolTable *symbolTable, int shaderVersion);
+void RemoveDynamicIndexing(TIntermNode *root,
+                           TSymbolTable *symbolTable,
+                           int shaderVersion,
+                           PerformanceDiagnostics *perfDiagnostics);
 
 }  // namespace sh
 
diff --git a/src/compiler/translator/RemoveSwitchFallThrough.cpp b/src/compiler/translator/RemoveSwitchFallThrough.cpp
index 10da1df..ab2045d 100644
--- a/src/compiler/translator/RemoveSwitchFallThrough.cpp
+++ b/src/compiler/translator/RemoveSwitchFallThrough.cpp
@@ -10,6 +10,7 @@
 
 #include "compiler/translator/RemoveSwitchFallThrough.h"
 
+#include "compiler/translator/Diagnostics.h"
 #include "compiler/translator/IntermTraverse.h"
 
 namespace sh
@@ -21,10 +22,12 @@
 class RemoveSwitchFallThroughTraverser : public TIntermTraverser
 {
   public:
-    static TIntermBlock *removeFallThrough(TIntermBlock *statementList);
+    static TIntermBlock *removeFallThrough(TIntermBlock *statementList,
+                                           PerformanceDiagnostics *perfDiagnostics);
 
   private:
-    RemoveSwitchFallThroughTraverser(TIntermBlock *statementList);
+    RemoveSwitchFallThroughTraverser(TIntermBlock *statementList,
+                                     PerformanceDiagnostics *perfDiagnostics);
 
     void visitSymbol(TIntermSymbol *node) override;
     void visitConstantUnion(TIntermConstantUnion *node) override;
@@ -49,11 +52,14 @@
     bool mLastStatementWasBreak;
     TIntermBlock *mPreviousCase;
     std::vector<TIntermBlock *> mCasesSharingBreak;
+    PerformanceDiagnostics *mPerfDiagnostics;
 };
 
-TIntermBlock *RemoveSwitchFallThroughTraverser::removeFallThrough(TIntermBlock *statementList)
+TIntermBlock *RemoveSwitchFallThroughTraverser::removeFallThrough(
+    TIntermBlock *statementList,
+    PerformanceDiagnostics *perfDiagnostics)
 {
-    RemoveSwitchFallThroughTraverser rm(statementList);
+    RemoveSwitchFallThroughTraverser rm(statementList, perfDiagnostics);
     ASSERT(statementList);
     statementList->traverse(&rm);
     ASSERT(rm.mPreviousCase || statementList->getSequence()->empty());
@@ -69,11 +75,14 @@
     return rm.mStatementListOut;
 }
 
-RemoveSwitchFallThroughTraverser::RemoveSwitchFallThroughTraverser(TIntermBlock *statementList)
+RemoveSwitchFallThroughTraverser::RemoveSwitchFallThroughTraverser(
+    TIntermBlock *statementList,
+    PerformanceDiagnostics *perfDiagnostics)
     : TIntermTraverser(true, false, false),
       mStatementList(statementList),
       mLastStatementWasBreak(false),
-      mPreviousCase(nullptr)
+      mPreviousCase(nullptr),
+      mPerfDiagnostics(perfDiagnostics)
 {
     mStatementListOut = new TIntermBlock();
 }
@@ -158,14 +167,10 @@
         mCasesSharingBreak.push_back(mPreviousCase);
     if (mLastStatementWasBreak)
     {
-        bool labelsWithNoStatements = true;
         for (size_t i = 0; i < mCasesSharingBreak.size(); ++i)
         {
-            if (mCasesSharingBreak.at(i)->getSequence()->size() > 1)
-            {
-                labelsWithNoStatements = false;
-            }
-            if (labelsWithNoStatements)
+            ASSERT(!mCasesSharingBreak.at(i)->getSequence()->empty());
+            if (mCasesSharingBreak.at(i)->getSequence()->size() == 1)
             {
                 // Fall-through is allowed in case the label has no statements.
                 outputSequence(mCasesSharingBreak.at(i)->getSequence(), 0);
@@ -173,6 +178,13 @@
             else
             {
                 // Include all the statements that this case can fall through under the same label.
+                if (mCasesSharingBreak.size() > i + 1u)
+                {
+                    mPerfDiagnostics->warning(mCasesSharingBreak.at(i)->getLine(),
+                                              "Performance: non-empty fall-through cases in "
+                                              "switch statements generate extra code.",
+                                              "switch");
+                }
                 for (size_t j = i; j < mCasesSharingBreak.size(); ++j)
                 {
                     size_t startIndex =
@@ -192,6 +204,7 @@
     handlePreviousCase();
     mPreviousCase = new TIntermBlock();
     mPreviousCase->getSequence()->push_back(node);
+    mPreviousCase->setLine(node->getLine());
     // Don't traverse the condition of the case statement
     return false;
 }
@@ -248,9 +261,10 @@
 
 }  // anonymous namespace
 
-TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList)
+TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList,
+                                      PerformanceDiagnostics *perfDiagnostics)
 {
-    return RemoveSwitchFallThroughTraverser::removeFallThrough(statementList);
+    return RemoveSwitchFallThroughTraverser::removeFallThrough(statementList, perfDiagnostics);
 }
 
 }  // namespace sh
diff --git a/src/compiler/translator/RemoveSwitchFallThrough.h b/src/compiler/translator/RemoveSwitchFallThrough.h
index 694b9f0..7a3b196 100644
--- a/src/compiler/translator/RemoveSwitchFallThrough.h
+++ b/src/compiler/translator/RemoveSwitchFallThrough.h
@@ -15,10 +15,12 @@
 {
 
 class TIntermBlock;
+class PerformanceDiagnostics;
 
 // When given a statementList from a switch AST node, return an updated
 // statementList that has fall-through removed.
-TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList);
+TIntermBlock *RemoveSwitchFallThrough(TIntermBlock *statementList,
+                                      PerformanceDiagnostics *perfDiagnostics);
 
 }  // namespace sh
 
diff --git a/src/compiler/translator/TranslatorESSL.cpp b/src/compiler/translator/TranslatorESSL.cpp
index 39d0f16..ff4b098 100644
--- a/src/compiler/translator/TranslatorESSL.cpp
+++ b/src/compiler/translator/TranslatorESSL.cpp
@@ -30,7 +30,9 @@
     }
 }
 
-void TranslatorESSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorESSL::translate(TIntermBlock *root,
+                               ShCompileOptions compileOptions,
+                               PerformanceDiagnostics * /*perfDiagnostics*/)
 {
     // The ESSL output doesn't define a default precision for float, so float literal statements
     // end up with no precision which is invalid ESSL.
diff --git a/src/compiler/translator/TranslatorESSL.h b/src/compiler/translator/TranslatorESSL.h
index 7e5a897..24dc738 100644
--- a/src/compiler/translator/TranslatorESSL.h
+++ b/src/compiler/translator/TranslatorESSL.h
@@ -21,7 +21,9 @@
     void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
                                      ShCompileOptions compileOptions) override;
 
-    void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+    void translate(TIntermBlock *root,
+                   ShCompileOptions compileOptions,
+                   PerformanceDiagnostics *perfDiagnostics) override;
     bool shouldFlattenPragmaStdglInvariantAll() override;
 
   private:
diff --git a/src/compiler/translator/TranslatorGLSL.cpp b/src/compiler/translator/TranslatorGLSL.cpp
index 8a735d8..a14e69e 100644
--- a/src/compiler/translator/TranslatorGLSL.cpp
+++ b/src/compiler/translator/TranslatorGLSL.cpp
@@ -45,7 +45,9 @@
     InitBuiltInFunctionEmulatorForGLSLMissingFunctions(emu, getShaderType(), targetGLSLVersion);
 }
 
-void TranslatorGLSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorGLSL::translate(TIntermBlock *root,
+                               ShCompileOptions compileOptions,
+                               PerformanceDiagnostics * /*perfDiagnostics*/)
 {
     TInfoSinkBase &sink = getInfoSink().obj;
 
diff --git a/src/compiler/translator/TranslatorGLSL.h b/src/compiler/translator/TranslatorGLSL.h
index 335d543..982d0e5 100644
--- a/src/compiler/translator/TranslatorGLSL.h
+++ b/src/compiler/translator/TranslatorGLSL.h
@@ -21,7 +21,9 @@
     void initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu,
                                      ShCompileOptions compileOptions) override;
 
-    void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+    void translate(TIntermBlock *root,
+                   ShCompileOptions compileOptions,
+                   PerformanceDiagnostics *perfDiagnostics) override;
     bool shouldFlattenPragmaStdglInvariantAll() override;
     bool shouldCollectVariables(ShCompileOptions compileOptions) override;
 
diff --git a/src/compiler/translator/TranslatorHLSL.cpp b/src/compiler/translator/TranslatorHLSL.cpp
index ddf25be..091a649 100644
--- a/src/compiler/translator/TranslatorHLSL.cpp
+++ b/src/compiler/translator/TranslatorHLSL.cpp
@@ -34,7 +34,9 @@
 {
 }
 
-void TranslatorHLSL::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorHLSL::translate(TIntermBlock *root,
+                               ShCompileOptions compileOptions,
+                               PerformanceDiagnostics *perfDiagnostics)
 {
     const ShBuiltInResources &resources = getResources();
     int numRenderTargets                = resources.EXT_draw_buffers ? resources.MaxDrawBuffers : 1;
@@ -71,7 +73,7 @@
     if (!shouldRunLoopAndIndexingValidation(compileOptions))
     {
         // HLSL doesn't support dynamic indexing of vectors and matrices.
-        RemoveDynamicIndexing(root, &getSymbolTable(), getShaderVersion());
+        RemoveDynamicIndexing(root, &getSymbolTable(), getShaderVersion(), perfDiagnostics);
     }
 
     // Work around D3D9 bug that would manifest in vertex shaders with selection blocks which
@@ -126,7 +128,7 @@
 
     sh::OutputHLSL outputHLSL(getShaderType(), getShaderVersion(), getExtensionBehavior(),
                               getSourcePath(), getOutputType(), numRenderTargets, getUniforms(),
-                              compileOptions, &getSymbolTable());
+                              compileOptions, &getSymbolTable(), perfDiagnostics);
 
     outputHLSL.output(root, getInfoSink().obj);
 
diff --git a/src/compiler/translator/TranslatorHLSL.h b/src/compiler/translator/TranslatorHLSL.h
index ca8241f..a55d906 100644
--- a/src/compiler/translator/TranslatorHLSL.h
+++ b/src/compiler/translator/TranslatorHLSL.h
@@ -24,7 +24,9 @@
     const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
 
   protected:
-    void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+    void translate(TIntermBlock *root,
+                   ShCompileOptions compileOptions,
+                   PerformanceDiagnostics *perfDiagnostics) override;
     bool shouldFlattenPragmaStdglInvariantAll() override;
 
     // collectVariables needs to be run always so registers can be assigned.
diff --git a/src/compiler/translator/TranslatorVulkan.cpp b/src/compiler/translator/TranslatorVulkan.cpp
index 2e42b79..f78f46e 100644
--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@@ -94,7 +94,9 @@
 {
 }
 
-void TranslatorVulkan::translate(TIntermBlock *root, ShCompileOptions compileOptions)
+void TranslatorVulkan::translate(TIntermBlock *root,
+                                 ShCompileOptions compileOptions,
+                                 PerformanceDiagnostics * /*perfDiagnostics*/)
 {
     TInfoSinkBase &sink = getInfoSink().obj;
 
diff --git a/src/compiler/translator/TranslatorVulkan.h b/src/compiler/translator/TranslatorVulkan.h
index f7a7cd9..ef67b15 100644
--- a/src/compiler/translator/TranslatorVulkan.h
+++ b/src/compiler/translator/TranslatorVulkan.h
@@ -23,7 +23,9 @@
     TranslatorVulkan(sh::GLenum type, ShShaderSpec spec);
 
   protected:
-    void translate(TIntermBlock *root, ShCompileOptions compileOptions) override;
+    void translate(TIntermBlock *root,
+                   ShCompileOptions compileOptions,
+                   PerformanceDiagnostics *perfDiagnostics) override;
     bool shouldFlattenPragmaStdglInvariantAll() override;
 };