EmulatePrecision: Round constructor args only when needed

Constructor arguments don't need to be rounded if the constructor
result will be rounded to the same precision.

This will make precision emulated shaders slightly faster to execute
in some cases.

BUG=angleproject:874
TEST=angle_unittests

Change-Id: I4220cd2289c97dcf5b7a25a4cbdd18487947c2d2
Reviewed-on: https://chromium-review.googlesource.com/500288
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Commit-Queue: Olli Etuaho <oetuaho@nvidia.com>
diff --git a/src/compiler/translator/EmulatePrecision.cpp b/src/compiler/translator/EmulatePrecision.cpp
index 4b08309..aebcce6 100644
--- a/src/compiler/translator/EmulatePrecision.cpp
+++ b/src/compiler/translator/EmulatePrecision.cpp
@@ -467,7 +467,7 @@
     return createInternalFunctionCallNode(TType(EbtVoid), functionName, arguments);
 }
 
-bool parentUsesResult(TIntermNode *parent, TIntermNode *node)
+bool ParentUsesResult(TIntermNode *parent, TIntermTyped *node)
 {
     if (!parent)
     {
@@ -490,6 +490,24 @@
     return true;
 }
 
+bool ParentConstructorTakesCareOfRounding(TIntermNode *parent, TIntermTyped *node)
+{
+    if (!parent)
+    {
+        return false;
+    }
+    TIntermAggregate *parentConstructor = parent->getAsAggregate();
+    if (!parentConstructor || parentConstructor->getOp() != EOpConstruct)
+    {
+        return false;
+    }
+    if (parentConstructor->getPrecision() != node->getPrecision())
+    {
+        return false;
+    }
+    return canRoundFloat(parentConstructor->getType());
+}
+
 }  // namespace anonymous
 
 EmulatePrecision::EmulatePrecision(const TSymbolTable &symbolTable, int shaderVersion)
@@ -500,7 +518,10 @@
 
 void EmulatePrecision::visitSymbol(TIntermSymbol *node)
 {
-    if (canRoundFloat(node->getType()) && !mDeclaringVariables && !isLValueRequiredHere())
+    TIntermNode *parent = getParentNode();
+    if (canRoundFloat(node->getType()) && ParentUsesResult(parent, node) &&
+        !ParentConstructorTakesCareOfRounding(parent, node) && !mDeclaringVariables &&
+        !isLValueRequiredHere())
     {
         TIntermNode *replacement = createRoundingFunctionCallNode(node);
         queueReplacement(node, replacement, OriginalNode::BECOMES_CHILD);
@@ -545,7 +566,8 @@
             case EOpMatrixTimesMatrix:
             {
                 TIntermNode *parent = getParentNode();
-                if (!parentUsesResult(parent, node))
+                if (!ParentUsesResult(parent, node) ||
+                    ParentConstructorTakesCareOfRounding(parent, node))
                 {
                     break;
                 }
@@ -637,7 +659,8 @@
 
 bool EmulatePrecision::visitAggregate(Visit visit, TIntermAggregate *node)
 {
-    bool visitChildren = true;
+    if (visit != PreVisit)
+        return true;
     switch (node->getOp())
     {
         case EOpCallInternalRawFunction:
@@ -652,15 +675,15 @@
             }
         default:
             TIntermNode *parent = getParentNode();
-            if (canRoundFloat(node->getType()) && visit == PreVisit &&
-                parentUsesResult(parent, node))
+            if (canRoundFloat(node->getType()) && ParentUsesResult(parent, node) &&
+                !ParentConstructorTakesCareOfRounding(parent, node))
             {
                 TIntermNode *replacement = createRoundingFunctionCallNode(node);
                 queueReplacement(node, replacement, OriginalNode::BECOMES_CHILD);
             }
             break;
     }
-    return visitChildren;
+    return true;
 }
 
 bool EmulatePrecision::visitUnary(Visit visit, TIntermUnary *node)