Use the AST analyses to narrow the usage of [[loop]] and [[unroll]]

These attributes are now used exactly in the loops and ifs
that require them, limiting the number of failed compilations
due to excessive unrolling and flattening.
Also output Lod0 functions only when needed.

Adds unit tests for LOOP, FLATTEN and Lod0 generation.

The patch was tested against the WebGL CTS 1.0.4 for which all the
failures existed prior to this patch and seem to be unrelated to this
change. It also works correctly on the following sites that had trouble
with [[loop]] and [[unroll]]:
 * dev.miaumiau.cat/rayTracer "Skull Demo"
 * The turbulenz engine particle demo
 * Lots of ShaderToy samples (including "Volcanic" and "Metropolis")
 * Google Maps Earth mode
 * Lots of Chrome Experiments
 * Lagoa
 * madebyevan.com/webgl-water
 * SketchFab
 * Unit Tests

BUG=angleproject:937
BUG=395048

Change-Id: I856de9025f10b79781929ec212dbffc2064a940e
Reviewed-on: https://chromium-review.googlesource.com/264791
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Tested-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 80782e2..f63a47a 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -14,7 +14,6 @@
 #include "common/utilities.h"
 #include "compiler/translator/BuiltInFunctionEmulator.h"
 #include "compiler/translator/BuiltInFunctionEmulatorHLSL.h"
-#include "compiler/translator/DetectDiscontinuity.h"
 #include "compiler/translator/FlagStd140Structs.h"
 #include "compiler/translator/InfoSink.h"
 #include "compiler/translator/NodeSearch.h"
@@ -109,7 +108,8 @@
       mSourcePath(sourcePath),
       mOutputType(outputType),
       mNumRenderTargets(numRenderTargets),
-      mCompileOptions(compileOptions)
+      mCompileOptions(compileOptions),
+      mCurrentFunctionMetadata(nullptr)
 {
     mUnfoldShortCircuit = new UnfoldShortCircuit(this);
     mInsideFunction = false;
@@ -130,8 +130,6 @@
 
     mUniqueIndex = 0;
 
-    mContainsLoopDiscontinuity = false;
-    mContainsAnyLoop = false;
     mOutputLod0Function = false;
     mInsideDiscontinuousLoop = false;
     mNestedLoopDepth = 0;
@@ -170,9 +168,6 @@
 
 void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
 {
-    mContainsLoopDiscontinuity = mShaderType == GL_FRAGMENT_SHADER && containsLoopDiscontinuity(treeRoot);
-    mContainsAnyLoop = containsAnyLoop(treeRoot);
-
     const std::vector<TIntermTyped*> &flaggedStructs = FlagStd140ValueStructs(treeRoot);
     makeFlaggedStructMaps(flaggedStructs);
 
@@ -188,12 +183,9 @@
     builtInFunctionEmulator.MarkBuiltInFunctionsForEmulation(treeRoot);
 
     // Now that we are done changing the AST, do the analyses need for HLSL generation
-    {
-        CallDAG dag;
-        CallDAG::InitResult success = dag.init(treeRoot, &objSink);
-        ASSERT(success == CallDAG::INITDAG_SUCCESS);
-        mASTAnalyses = CreateASTMetadataHLSL(treeRoot, dag);
-    }
+    CallDAG::InitResult success = mCallDag.init(treeRoot, &objSink);
+    ASSERT(success == CallDAG::INITDAG_SUCCESS);
+    mASTMetadataList = CreateASTMetadataHLSL(treeRoot, mCallDag);
 
     // Output the body and footer first to determine what has to go in the header
     mInfoSinkStack.push(&mBody);
@@ -1963,6 +1955,13 @@
       case EOpPrototype:
         if (visit == PreVisit)
         {
+            size_t index = mCallDag.findIndex(node);
+            // Skip the prototype if it is not implemented (and thus not used)
+            if (index == CallDAG::InvalidIndex)
+            {
+                return false;
+            }
+
             out << TypeString(node->getType()) << " " << Decorate(TFunction::unmangleName(node->getName())) << (mOutputLod0Function ? "Lod0(" : "(");
 
             TIntermSequence *arguments = node->getSequence();
@@ -1986,7 +1985,8 @@
             out << ");\n";
 
             // Also prototype the Lod0 variant if needed
-            if (mContainsLoopDiscontinuity && !mOutputLod0Function)
+            bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
+            if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
             {
                 mOutputLod0Function = true;
                 node->traverse(this);
@@ -1999,8 +1999,13 @@
       case EOpComma:            outputTriplet(visit, "(", ", ", ")");                break;
       case EOpFunction:
         {
+            ASSERT(mCurrentFunctionMetadata == nullptr);
             TString name = TFunction::unmangleName(node->getName());
 
+            size_t index = mCallDag.findIndex(node);
+            ASSERT(index != CallDAG::InvalidIndex);
+            mCurrentFunctionMetadata = &mASTMetadataList[index];
+
             out << TypeString(node->getType()) << " ";
 
             if (name == "main")
@@ -2050,14 +2055,15 @@
 
             out << "}\n";
 
-            if (mContainsLoopDiscontinuity && !mOutputLod0Function)
+            mCurrentFunctionMetadata = nullptr;
+
+            bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
+            if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
             {
-                if (name != "main")
-                {
-                    mOutputLod0Function = true;
-                    node->traverse(this);
-                    mOutputLod0Function = false;
-                }
+                ASSERT(name != "main");
+                mOutputLod0Function = true;
+                node->traverse(this);
+                mOutputLod0Function = false;
             }
 
             return false;
@@ -2066,11 +2072,15 @@
       case EOpFunctionCall:
         {
             TString name = TFunction::unmangleName(node->getName());
-            bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
             TIntermSequence *arguments = node->getSequence();
 
+            bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
             if (node->isUserDefined())
             {
+                size_t index = mCallDag.findIndex(node);
+                ASSERT(index != CallDAG::InvalidIndex);
+                lod0 &= mASTMetadataList[index].mNeedsLod0;
+
                 out << Decorate(name) << (lod0 ? "Lod0(" : "(");
             }
             else
@@ -2301,11 +2311,10 @@
     {
         mUnfoldShortCircuit->traverse(node->getCondition());
 
-        // D3D errors when there is a gradient operation in a loop in an unflattened if
-        // however flattening all the ifs in branch heavy shaders made D3D error too.
-        // As a temporary workaround we flatten the ifs only if there is at least a loop
-        // present somewhere in the shader.
-        if (mShaderType == GL_FRAGMENT_SHADER && mContainsAnyLoop)
+        // D3D errors when there is a gradient operation in a loop in an unflattened if.
+        if (mShaderType == GL_FRAGMENT_SHADER
+            && mCurrentFunctionMetadata->hasDiscontinuousLoop(node)
+            && mCurrentFunctionMetadata->hasGradientInCallGraph(node))
         {
             out << "FLATTEN ";
         }
@@ -2400,11 +2409,8 @@
     mNestedLoopDepth++;
 
     bool wasDiscontinuous = mInsideDiscontinuousLoop;
-
-    if (mContainsLoopDiscontinuity && !mInsideDiscontinuousLoop)
-    {
-        mInsideDiscontinuousLoop = containsLoopDiscontinuity(node);
-    }
+    mInsideDiscontinuousLoop = mInsideDiscontinuousLoop ||
+    mCurrentFunctionMetadata->mDiscontinuousLoops.count(node) >= 0;
 
     if (mOutputType == SH_HLSL9_OUTPUT)
     {
@@ -2419,16 +2425,17 @@
 
     TInfoSinkBase &out = getInfoSink();
 
+    const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
     if (node->getType() == ELoopDoWhile)
     {
-        out << "{LOOP do\n";
+        out << "{" << unroll << " do\n";
 
         outputLineDirective(node->getLine().first_line);
         out << "{\n";
     }
     else
     {
-        out << "{LOOP for(";
+        out << "{" << unroll << " for(";
 
         if (node->getInit())
         {
@@ -2734,8 +2741,9 @@
                 }
 
                 // for(int index = initial; index < clampedLimit; index += increment)
+                const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
 
-                out << "LOOP for(";
+                out << unroll << " for(";
                 index->traverse(this);
                 out << " = ";
                 out << initial;