Implement simple ESSL3 matrix functions

Add support for built-in functions outerProduct, determinant, transpose
and the variations of matrixCompMult that were previously unsupported.

BUG=angle:859

Change-Id: Ie2b9cf83cd80c5a886c6d4eb190c7ce25a32d0a4
Reviewed-on: https://chromium-review.googlesource.com/239873
Reviewed-by: Nicolas Capens <capn@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Olli Etuaho <oetuaho@nvidia.com>
Tested-by: Olli Etuaho <oetuaho@nvidia.com>
diff --git a/src/compiler/translator/BuiltInFunctionEmulator.cpp b/src/compiler/translator/BuiltInFunctionEmulator.cpp
index 37958b3..d0b1908c 100644
--- a/src/compiler/translator/BuiltInFunctionEmulator.cpp
+++ b/src/compiler/translator/BuiltInFunctionEmulator.cpp
@@ -56,6 +56,7 @@
                 case EOpFaceForward:
                 case EOpReflect:
                 case EOpRefract:
+                case EOpOuterProduct:
                 case EOpMul:
                     break;
                 default:
diff --git a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
index 4de954c..0d4c62e 100644
--- a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
+++ b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
@@ -193,5 +193,52 @@
         "float4 webgl_atanh_emu(in float4 x) {\n"
         "    return 0.5 * log((1.0 + x) / (1.0 - x));\n"
         "}\n");
+
+    // The matrix resulting from outer product needs to be transposed
+    // (matrices are stored as transposed to simplify element access in HLSL).
+    // So the function should return transpose(c * r) where c is a column vector
+    // and r is a row vector. This can be simplified by using the following
+    // formula:
+    //   transpose(c * r) = transpose(r) * transpose(c)
+    // transpose(r) and transpose(c) are in a sense free, since to get the
+    // transpose of r, we simply can build a column matrix out of the original
+    // vector instead of a row matrix.
+    AddEmulatedFunction(EOpOuterProduct, float2, float2,
+        "float2x2 webgl_outerProduct_emu(in float2 c, in float2 r) {\n"
+        "    return mul(float2x1(r), float1x2(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float3, float3,
+        "float3x3 webgl_outerProduct_emu(in float3 c, in float3 r) {\n"
+        "    return mul(float3x1(r), float1x3(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float4, float4,
+        "float4x4 webgl_outerProduct_emu(in float4 c, in float4 r) {\n"
+        "    return mul(float4x1(r), float1x4(c));\n"
+        "}\n");
+
+    AddEmulatedFunction(EOpOuterProduct, float3, float2,
+        "float2x3 webgl_outerProduct_emu(in float3 c, in float2 r) {\n"
+        "    return mul(float2x1(r), float1x3(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float2, float3,
+        "float3x2 webgl_outerProduct_emu(in float2 c, in float3 r) {\n"
+        "    return mul(float3x1(r), float1x2(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float4, float2,
+        "float2x4 webgl_outerProduct_emu(in float4 c, in float2 r) {\n"
+        "    return mul(float2x1(r), float1x4(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float2, float4,
+        "float4x2 webgl_outerProduct_emu(in float2 c, in float4 r) {\n"
+        "    return mul(float4x1(r), float1x2(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float4, float3,
+        "float3x4 webgl_outerProduct_emu(in float4 c, in float3 r) {\n"
+        "    return mul(float3x1(r), float1x4(c));\n"
+        "}\n");
+    AddEmulatedFunction(EOpOuterProduct, float3, float4,
+        "float4x3 webgl_outerProduct_emu(in float3 c, in float4 r) {\n"
+        "    return mul(float4x1(r), float1x3(c));\n"
+        "}\n");
 }
 
diff --git a/src/compiler/translator/Initialize.cpp b/src/compiler/translator/Initialize.cpp
index cd0ef81..aad374f 100644
--- a/src/compiler/translator/Initialize.cpp
+++ b/src/compiler/translator/Initialize.cpp
@@ -294,6 +294,12 @@
     TType *mat2 = new TType(EbtFloat, 2, 2);
     TType *mat3 = new TType(EbtFloat, 3, 3);
     TType *mat4 = new TType(EbtFloat, 4, 4);
+    TType *mat2x3 = new TType(EbtFloat, 2, 3);
+    TType *mat3x2 = new TType(EbtFloat, 3, 2);
+    TType *mat2x4 = new TType(EbtFloat, 2, 4);
+    TType *mat4x2 = new TType(EbtFloat, 4, 2);
+    TType *mat3x4 = new TType(EbtFloat, 3, 4);
+    TType *mat4x3 = new TType(EbtFloat, 4, 3);
 
     //
     // Matrix Functions.
@@ -301,6 +307,36 @@
     symbolTable.insertBuiltIn(COMMON_BUILTINS, mat2, "matrixCompMult", mat2, mat2);
     symbolTable.insertBuiltIn(COMMON_BUILTINS, mat3, "matrixCompMult", mat3, mat3);
     symbolTable.insertBuiltIn(COMMON_BUILTINS, mat4, "matrixCompMult", mat4, mat4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x3, "matrixCompMult", mat2x3, mat2x3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x2, "matrixCompMult", mat3x2, mat3x2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x4, "matrixCompMult", mat2x4, mat2x4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x2, "matrixCompMult", mat4x2, mat4x2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x4, "matrixCompMult", mat3x4, mat3x4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x3, "matrixCompMult", mat4x3, mat4x3);
+
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2, "outerProduct", float2, float2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3, "outerProduct", float3, float3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4, "outerProduct", float4, float4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x3, "outerProduct", float3, float2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x2, "outerProduct", float2, float3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x4, "outerProduct", float4, float2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x2, "outerProduct", float2, float4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x4, "outerProduct", float4, float3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x3, "outerProduct", float3, float4);
+
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2, "transpose", mat2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3, "transpose", mat3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4, "transpose", mat4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x3, "transpose", mat3x2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x2, "transpose", mat2x3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2x4, "transpose", mat4x2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x2, "transpose", mat2x4);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3x4, "transpose", mat4x3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4x3, "transpose", mat3x4);
+
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat4);
 
     TType *bool1 = new TType(EbtBool);
     TType *bool2 = new TType(EbtBool, 2);
@@ -710,6 +746,7 @@
     // operations.
     //
     symbolTable.relateToOperator(COMMON_BUILTINS, "matrixCompMult",   EOpMul);
+    symbolTable.relateToOperator(ESSL3_BUILTINS,  "matrixCompMult",   EOpMul);
 
     symbolTable.relateToOperator(COMMON_BUILTINS, "equal",            EOpVectorEqual);
     symbolTable.relateToOperator(COMMON_BUILTINS, "notEqual",         EOpVectorNotEqual);
@@ -769,6 +806,10 @@
     symbolTable.relateToOperator(COMMON_BUILTINS, "reflect",      EOpReflect);
     symbolTable.relateToOperator(COMMON_BUILTINS, "refract",      EOpRefract);
 
+    symbolTable.relateToOperator(ESSL3_BUILTINS, "outerProduct",  EOpOuterProduct);
+    symbolTable.relateToOperator(ESSL3_BUILTINS, "transpose",     EOpTranspose);
+    symbolTable.relateToOperator(ESSL3_BUILTINS, "determinant",   EOpDeterminant);
+
     symbolTable.relateToOperator(COMMON_BUILTINS, "any",          EOpAny);
     symbolTable.relateToOperator(COMMON_BUILTINS, "all",          EOpAll);
     symbolTable.relateToOperator(COMMON_BUILTINS, "not",          EOpVectorLogicalNot);
diff --git a/src/compiler/translator/IntermNode.h b/src/compiler/translator/IntermNode.h
index fff4969..3b97872 100644
--- a/src/compiler/translator/IntermNode.h
+++ b/src/compiler/translator/IntermNode.h
@@ -150,6 +150,10 @@
 
     EOpMatrixTimesMatrix,
 
+    EOpOuterProduct,
+    EOpTranspose,
+    EOpDeterminant,
+
     EOpAny,
     EOpAll,
 
diff --git a/src/compiler/translator/OutputGLSLBase.cpp b/src/compiler/translator/OutputGLSLBase.cpp
index e5743d2..5553960 100644
--- a/src/compiler/translator/OutputGLSLBase.cpp
+++ b/src/compiler/translator/OutputGLSLBase.cpp
@@ -516,6 +516,13 @@
         preString = "fwidth(";
         break;
 
+      case EOpTranspose:
+        preString = "transpose(";
+        break;
+      case EOpDeterminant:
+        preString = "determinant(";
+        break;
+
       case EOpAny:
         preString = "any(";
         break;
@@ -768,6 +775,10 @@
         }
         break;
 
+      case EOpOuterProduct:
+        writeBuiltInFunctionTriplet(visit, "outerProduct(", useEmulatedFunction);
+        break;
+
       case EOpLessThan:
         writeBuiltInFunctionTriplet(visit, "lessThan(", useEmulatedFunction);
         break;
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index fe79221..56ceb20 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -1667,6 +1667,9 @@
             outputTriplet(visit, "fwidth(", "", ")");
         }
         break;
+      case EOpTranspose:        outputTriplet(visit, "transpose(", "", ")");   break;
+      case EOpDeterminant:      outputTriplet(visit, "determinant(transpose(", "", "))"); break;
+
       case EOpAny:              outputTriplet(visit, "any(", "", ")");       break;
       case EOpAll:              outputTriplet(visit, "all(", "", ")");       break;
       default: UNREACHABLE();
@@ -2103,6 +2106,10 @@
         break;
       case EOpReflect:       outputTriplet(visit, "reflect(", ", ", ")");       break;
       case EOpRefract:       outputTriplet(visit, "refract(", ", ", ")");       break;
+      case EOpOuterProduct:
+        ASSERT(node->getUseEmulatedFunction());
+        writeEmulatedFunctionTriplet(visit, "outerProduct(");
+        break;
       case EOpMul:           outputTriplet(visit, "(", " * ", ")");             break;
       default: UNREACHABLE();
     }
diff --git a/src/compiler/translator/intermOut.cpp b/src/compiler/translator/intermOut.cpp
index 19d0a75..0138a8f 100644
--- a/src/compiler/translator/intermOut.cpp
+++ b/src/compiler/translator/intermOut.cpp
@@ -308,6 +308,9 @@
       // case EOpDPdy:           out << "dPdy";                 break;
       // case EOpFwidth:         out << "fwidth";               break;
 
+      case EOpDeterminant:    out << "determinant";          break;
+      case EOpTranspose:      out << "transpose";            break;
+
       case EOpAny:            out << "any";                  break;
       case EOpAll:            out << "all";                  break;
 
@@ -393,6 +396,8 @@
       case EOpRefract:       out << "refract";                 break;
       case EOpMul:           out << "component-wise multiply"; break;
 
+      case EOpOuterProduct:  out << "outer product";   break;
+
       case EOpDeclaration:   out << "Declaration: ";   break;
       case EOpInvariantDeclaration: out << "Invariant Declaration: "; break;