Implement matrix inverse built-in

Inverse is emulated in HLSL by calculating the cofactor matrix and
dividing that with the determinant. This results in the transpose of the
inverse as is required. Better performing options might exist especially
for 4x4 matrices, but this is enough for a working implementation.

BUG=angle:859

Change-Id: I5185797cc1ed86865f5f4342707abdc2977a186b
Reviewed-on: https://chromium-review.googlesource.com/240331
Tested-by: Olli Etuaho <oetuaho@nvidia.com>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
index b39f257..8e3688f 100644
--- a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
+++ b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
@@ -149,7 +149,7 @@
         "}\n");
     AddEmulatedFunction(EOpAsinh, float2,
         "float2 webgl_asinh_emu(in float2 x) {\n"
-        "    return log(x + sqrt(pow(x, 2.0) + 1.0));"
+        "    return log(x + sqrt(pow(x, 2.0) + 1.0));\n"
         "}\n");
     AddEmulatedFunction(EOpAsinh, float3,
         "float3 webgl_asinh_emu(in float3 x) {\n"
@@ -240,6 +240,87 @@
         "float4x3 webgl_outerProduct_emu(in float3 c, in float4 r) {\n"
         "    return mul(float4x1(r), float1x3(c));\n"
         "}\n");
+
+    TType mat2(EbtFloat, 2, 2);
+    TType mat3(EbtFloat, 3, 3);
+    TType mat4(EbtFloat, 4, 4);
+
+    // Remember here that the parameter matrix is actually the transpose
+    // of the matrix that we're trying to invert, and the resulting matrix
+    // should also be the transpose of the inverse.
+
+    // When accessing the parameter matrix with m[a][b] it can be thought of so
+    // that a is the column and b is the row of the matrix that we're inverting.
+
+    // We calculate the inverse as the adjugate matrix divided by the
+    // determinant of the matrix being inverted. However, as the result needs
+    // to be transposed, we actually use of the transpose of the adjugate matrix
+    // which happens to be the cofactor matrix. That's stored in "cof".
+
+    // We don't need to care about divide-by-zero since results are undefined
+    // for singular or poorly-conditioned matrices.
+
+    AddEmulatedFunction(EOpInverse, mat2,
+        "float2x2 webgl_inverse_emu(in float2x2 m) {\n"
+        "    float2x2 cof = { m[1][1], -m[0][1], -m[1][0], m[0][0] };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
+
+    // cofAB is the cofactor for column A and row B.
+
+    AddEmulatedFunction(EOpInverse, mat3,
+        "float3x3 webgl_inverse_emu(in float3x3 m) {\n"
+        "    float cof00 = m[1][1] * m[2][2] - m[2][1] * m[1][2];\n"
+        "    float cof01 = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);\n"
+        "    float cof02 = m[1][0] * m[2][1] - m[2][0] * m[1][1];\n"
+        "    float cof10 = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);\n"
+        "    float cof11 = m[0][0] * m[2][2] - m[2][0] * m[0][2];\n"
+        "    float cof12 = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);\n"
+        "    float cof20 = m[0][1] * m[1][2] - m[1][1] * m[0][2];\n"
+        "    float cof21 = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);\n"
+        "    float cof22 = m[0][0] * m[1][1] - m[1][0] * m[0][1];\n"
+        "    float3x3 cof = { cof00, cof10, cof20, cof01, cof11, cof21, cof02, cof12, cof22 };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
+
+    AddEmulatedFunction(EOpInverse, mat4,
+        "float4x4 webgl_inverse_emu(in float4x4 m) {\n"
+        "    float cof00 = m[1][1] * m[2][2] * m[3][3] + m[2][1] * m[3][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3]"
+                       " - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] - m[3][1] * m[2][2] * m[1][3];\n"
+        "    float cof01 = -(m[1][0] * m[2][2] * m[3][3] + m[2][0] * m[3][2] * m[1][3] + m[3][0] * m[1][2] * m[2][3]"
+                       " - m[1][0] * m[3][2] * m[2][3] - m[2][0] * m[1][2] * m[3][3] - m[3][0] * m[2][2] * m[1][3]);\n"
+        "    float cof02 = m[1][0] * m[2][1] * m[3][3] + m[2][0] * m[3][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3]"
+                       " - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] - m[3][0] * m[2][1] * m[1][3];\n"
+        "    float cof03 = -(m[1][0] * m[2][1] * m[3][2] + m[2][0] * m[3][1] * m[1][2] + m[3][0] * m[1][1] * m[2][2]"
+                       " - m[1][0] * m[3][1] * m[2][2] - m[2][0] * m[1][1] * m[3][2] - m[3][0] * m[2][1] * m[1][2]);\n"
+        "    float cof10 = -(m[0][1] * m[2][2] * m[3][3] + m[2][1] * m[3][2] * m[0][3] + m[3][1] * m[0][2] * m[2][3]"
+                       " - m[0][1] * m[3][2] * m[2][3] - m[2][1] * m[0][2] * m[3][3] - m[3][1] * m[2][2] * m[0][3]);\n"
+        "    float cof11 = m[0][0] * m[2][2] * m[3][3] + m[2][0] * m[3][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3]"
+                       " - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] - m[3][0] * m[2][2] * m[0][3];\n"
+        "    float cof12 = -(m[0][0] * m[2][1] * m[3][3] + m[2][0] * m[3][1] * m[0][3] + m[3][0] * m[0][1] * m[2][3]"
+                       " - m[0][0] * m[3][1] * m[2][3] - m[2][0] * m[0][1] * m[3][3] - m[3][0] * m[2][1] * m[0][3]);\n"
+        "    float cof13 = m[0][0] * m[2][1] * m[3][2] + m[2][0] * m[3][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2]"
+                       " - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] - m[3][0] * m[2][1] * m[0][2];\n"
+        "    float cof20 = m[0][1] * m[1][2] * m[3][3] + m[1][1] * m[3][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3]"
+                       " - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] - m[3][1] * m[1][2] * m[0][3];\n"
+        "    float cof21 = -(m[0][0] * m[1][2] * m[3][3] + m[1][0] * m[3][2] * m[0][3] + m[3][0] * m[0][2] * m[1][3]"
+                       " - m[0][0] * m[3][2] * m[1][3] - m[1][0] * m[0][2] * m[3][3] - m[3][0] * m[1][2] * m[0][3]);\n"
+        "    float cof22 = m[0][0] * m[1][1] * m[3][3] + m[1][0] * m[3][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3]"
+                       " - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] - m[3][0] * m[1][1] * m[0][3];\n"
+        "    float cof23 = -(m[0][0] * m[1][1] * m[3][2] + m[1][0] * m[3][1] * m[0][2] + m[3][0] * m[0][1] * m[1][2]"
+                       " - m[0][0] * m[3][1] * m[1][2] - m[1][0] * m[0][1] * m[3][2] - m[3][0] * m[1][1] * m[0][2]);\n"
+        "    float cof30 = -(m[0][1] * m[1][2] * m[2][3] + m[1][1] * m[2][2] * m[0][3] + m[2][1] * m[0][2] * m[1][3]"
+                       " - m[0][1] * m[2][2] * m[1][3] - m[1][1] * m[0][2] * m[2][3] - m[2][1] * m[1][2] * m[0][3]);\n"
+        "    float cof31 = m[0][0] * m[1][2] * m[2][3] + m[1][0] * m[2][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3]"
+                       " - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] - m[2][0] * m[1][2] * m[0][3];\n"
+        "    float cof32 = -(m[0][0] * m[1][1] * m[2][3] + m[1][0] * m[2][1] * m[0][3] + m[2][0] * m[0][1] * m[1][3]"
+                       " - m[0][0] * m[2][1] * m[1][3] - m[1][0] * m[0][1] * m[2][3] - m[2][0] * m[1][1] * m[0][3]);\n"
+        "    float cof33 = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2]"
+                       " - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] - m[2][0] * m[1][1] * m[0][2];\n"
+        "    float4x4 cof = { cof00, cof10, cof20, cof30, cof01, cof11, cof21, cof31,"
+                            " cof02, cof12, cof22, cof32, cof03, cof13, cof23, cof33 };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
 }
 
 void BuiltInFunctionEmulatorHLSL::OutputEmulatedFunctionDefinition(
diff --git a/src/compiler/translator/Initialize.cpp b/src/compiler/translator/Initialize.cpp
index aad374f..36c2f4a 100644
--- a/src/compiler/translator/Initialize.cpp
+++ b/src/compiler/translator/Initialize.cpp
@@ -338,6 +338,10 @@
     symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat3);
     symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat4);
 
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2, "inverse", mat2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3, "inverse", mat3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4, "inverse", mat4);
+
     TType *bool1 = new TType(EbtBool);
     TType *bool2 = new TType(EbtBool, 2);
     TType *bool3 = new TType(EbtBool, 3);
@@ -809,6 +813,7 @@
     symbolTable.relateToOperator(ESSL3_BUILTINS, "outerProduct",  EOpOuterProduct);
     symbolTable.relateToOperator(ESSL3_BUILTINS, "transpose",     EOpTranspose);
     symbolTable.relateToOperator(ESSL3_BUILTINS, "determinant",   EOpDeterminant);
+    symbolTable.relateToOperator(ESSL3_BUILTINS, "inverse",       EOpInverse);
 
     symbolTable.relateToOperator(COMMON_BUILTINS, "any",          EOpAny);
     symbolTable.relateToOperator(COMMON_BUILTINS, "all",          EOpAll);
diff --git a/src/compiler/translator/IntermNode.h b/src/compiler/translator/IntermNode.h
index 3b97872..722765e 100644
--- a/src/compiler/translator/IntermNode.h
+++ b/src/compiler/translator/IntermNode.h
@@ -153,6 +153,7 @@
     EOpOuterProduct,
     EOpTranspose,
     EOpDeterminant,
+    EOpInverse,
 
     EOpAny,
     EOpAll,
diff --git a/src/compiler/translator/OutputGLSLBase.cpp b/src/compiler/translator/OutputGLSLBase.cpp
index 5553960..23dcef3 100644
--- a/src/compiler/translator/OutputGLSLBase.cpp
+++ b/src/compiler/translator/OutputGLSLBase.cpp
@@ -522,6 +522,9 @@
       case EOpDeterminant:
         preString = "determinant(";
         break;
+      case EOpInverse:
+        preString = "inverse(";
+        break;
 
       case EOpAny:
         preString = "any(";
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 2af8c4e..40f8bd2 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -1669,6 +1669,10 @@
         break;
       case EOpTranspose:        outputTriplet(visit, "transpose(", "", ")");   break;
       case EOpDeterminant:      outputTriplet(visit, "determinant(transpose(", "", "))"); break;
+      case EOpInverse:
+        ASSERT(node->getUseEmulatedFunction());
+        writeEmulatedFunctionTriplet(visit, "inverse(");
+        break;
 
       case EOpAny:              outputTriplet(visit, "any(", "", ")");       break;
       case EOpAll:              outputTriplet(visit, "all(", "", ")");       break;
diff --git a/src/compiler/translator/intermOut.cpp b/src/compiler/translator/intermOut.cpp
index 0138a8f..0a211a5 100644
--- a/src/compiler/translator/intermOut.cpp
+++ b/src/compiler/translator/intermOut.cpp
@@ -310,6 +310,7 @@
 
       case EOpDeterminant:    out << "determinant";          break;
       case EOpTranspose:      out << "transpose";            break;
+      case EOpInverse:        out << "inverse";              break;
 
       case EOpAny:            out << "any";                  break;
       case EOpAll:            out << "all";                  break;