Implement matrix inverse built-in Inverse is emulated in HLSL by calculating the cofactor matrix and dividing that with the determinant. This results in the transpose of the inverse as is required. Better performing options might exist especially for 4x4 matrices, but this is enough for a working implementation. BUG=angle:859 Change-Id: I5185797cc1ed86865f5f4342707abdc2977a186b Reviewed-on: https://chromium-review.googlesource.com/240331 Tested-by: Olli Etuaho <oetuaho@nvidia.com> Reviewed-by: Geoff Lang <geofflang@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org>

commit: abf6dadd4983531c5cb8e28c0e3efbda3d283f0e [log] [tgz]
author: Olli Etuaho <oetuaho@nvidia.com> Wed Jan 14 14:45:16 2015 +0200
committer: Jamie Madill <jmadill@chromium.org> Fri Jan 16 15:41:40 2015 +0000
tree: f8480a870e424fcca72c30eb33d6f7475065d011
parent: d68157fc4b0dddf56f7d11a230a96e898cdb4750 [diff]
diff --git a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
index b39f257..8e3688f 100644
--- a/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp
+++ b/src/compiler/translator/BuiltInFunctionEmulatorHLSL.cpp

@@ -149,7 +149,7 @@
         "}\n");
     AddEmulatedFunction(EOpAsinh, float2,
         "float2 webgl_asinh_emu(in float2 x) {\n"
-        "    return log(x + sqrt(pow(x, 2.0) + 1.0));"
+        "    return log(x + sqrt(pow(x, 2.0) + 1.0));\n"
         "}\n");
     AddEmulatedFunction(EOpAsinh, float3,
         "float3 webgl_asinh_emu(in float3 x) {\n"
@@ -240,6 +240,87 @@
         "float4x3 webgl_outerProduct_emu(in float3 c, in float4 r) {\n"
         "    return mul(float4x1(r), float1x3(c));\n"
         "}\n");
+
+    TType mat2(EbtFloat, 2, 2);
+    TType mat3(EbtFloat, 3, 3);
+    TType mat4(EbtFloat, 4, 4);
+
+    // Remember here that the parameter matrix is actually the transpose
+    // of the matrix that we're trying to invert, and the resulting matrix
+    // should also be the transpose of the inverse.
+
+    // When accessing the parameter matrix with m[a][b] it can be thought of so
+    // that a is the column and b is the row of the matrix that we're inverting.
+
+    // We calculate the inverse as the adjugate matrix divided by the
+    // determinant of the matrix being inverted. However, as the result needs
+    // to be transposed, we actually use of the transpose of the adjugate matrix
+    // which happens to be the cofactor matrix. That's stored in "cof".
+
+    // We don't need to care about divide-by-zero since results are undefined
+    // for singular or poorly-conditioned matrices.
+
+    AddEmulatedFunction(EOpInverse, mat2,
+        "float2x2 webgl_inverse_emu(in float2x2 m) {\n"
+        "    float2x2 cof = { m[1][1], -m[0][1], -m[1][0], m[0][0] };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
+
+    // cofAB is the cofactor for column A and row B.
+
+    AddEmulatedFunction(EOpInverse, mat3,
+        "float3x3 webgl_inverse_emu(in float3x3 m) {\n"
+        "    float cof00 = m[1][1] * m[2][2] - m[2][1] * m[1][2];\n"
+        "    float cof01 = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);\n"
+        "    float cof02 = m[1][0] * m[2][1] - m[2][0] * m[1][1];\n"
+        "    float cof10 = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);\n"
+        "    float cof11 = m[0][0] * m[2][2] - m[2][0] * m[0][2];\n"
+        "    float cof12 = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);\n"
+        "    float cof20 = m[0][1] * m[1][2] - m[1][1] * m[0][2];\n"
+        "    float cof21 = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);\n"
+        "    float cof22 = m[0][0] * m[1][1] - m[1][0] * m[0][1];\n"
+        "    float3x3 cof = { cof00, cof10, cof20, cof01, cof11, cof21, cof02, cof12, cof22 };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
+
+    AddEmulatedFunction(EOpInverse, mat4,
+        "float4x4 webgl_inverse_emu(in float4x4 m) {\n"
+        "    float cof00 = m[1][1] * m[2][2] * m[3][3] + m[2][1] * m[3][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3]"
+                       " - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] - m[3][1] * m[2][2] * m[1][3];\n"
+        "    float cof01 = -(m[1][0] * m[2][2] * m[3][3] + m[2][0] * m[3][2] * m[1][3] + m[3][0] * m[1][2] * m[2][3]"
+                       " - m[1][0] * m[3][2] * m[2][3] - m[2][0] * m[1][2] * m[3][3] - m[3][0] * m[2][2] * m[1][3]);\n"
+        "    float cof02 = m[1][0] * m[2][1] * m[3][3] + m[2][0] * m[3][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3]"
+                       " - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] - m[3][0] * m[2][1] * m[1][3];\n"
+        "    float cof03 = -(m[1][0] * m[2][1] * m[3][2] + m[2][0] * m[3][1] * m[1][2] + m[3][0] * m[1][1] * m[2][2]"
+                       " - m[1][0] * m[3][1] * m[2][2] - m[2][0] * m[1][1] * m[3][2] - m[3][0] * m[2][1] * m[1][2]);\n"
+        "    float cof10 = -(m[0][1] * m[2][2] * m[3][3] + m[2][1] * m[3][2] * m[0][3] + m[3][1] * m[0][2] * m[2][3]"
+                       " - m[0][1] * m[3][2] * m[2][3] - m[2][1] * m[0][2] * m[3][3] - m[3][1] * m[2][2] * m[0][3]);\n"
+        "    float cof11 = m[0][0] * m[2][2] * m[3][3] + m[2][0] * m[3][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3]"
+                       " - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] - m[3][0] * m[2][2] * m[0][3];\n"
+        "    float cof12 = -(m[0][0] * m[2][1] * m[3][3] + m[2][0] * m[3][1] * m[0][3] + m[3][0] * m[0][1] * m[2][3]"
+                       " - m[0][0] * m[3][1] * m[2][3] - m[2][0] * m[0][1] * m[3][3] - m[3][0] * m[2][1] * m[0][3]);\n"
+        "    float cof13 = m[0][0] * m[2][1] * m[3][2] + m[2][0] * m[3][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2]"
+                       " - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] - m[3][0] * m[2][1] * m[0][2];\n"
+        "    float cof20 = m[0][1] * m[1][2] * m[3][3] + m[1][1] * m[3][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3]"
+                       " - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] - m[3][1] * m[1][2] * m[0][3];\n"
+        "    float cof21 = -(m[0][0] * m[1][2] * m[3][3] + m[1][0] * m[3][2] * m[0][3] + m[3][0] * m[0][2] * m[1][3]"
+                       " - m[0][0] * m[3][2] * m[1][3] - m[1][0] * m[0][2] * m[3][3] - m[3][0] * m[1][2] * m[0][3]);\n"
+        "    float cof22 = m[0][0] * m[1][1] * m[3][3] + m[1][0] * m[3][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3]"
+                       " - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] - m[3][0] * m[1][1] * m[0][3];\n"
+        "    float cof23 = -(m[0][0] * m[1][1] * m[3][2] + m[1][0] * m[3][1] * m[0][2] + m[3][0] * m[0][1] * m[1][2]"
+                       " - m[0][0] * m[3][1] * m[1][2] - m[1][0] * m[0][1] * m[3][2] - m[3][0] * m[1][1] * m[0][2]);\n"
+        "    float cof30 = -(m[0][1] * m[1][2] * m[2][3] + m[1][1] * m[2][2] * m[0][3] + m[2][1] * m[0][2] * m[1][3]"
+                       " - m[0][1] * m[2][2] * m[1][3] - m[1][1] * m[0][2] * m[2][3] - m[2][1] * m[1][2] * m[0][3]);\n"
+        "    float cof31 = m[0][0] * m[1][2] * m[2][3] + m[1][0] * m[2][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3]"
+                       " - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] - m[2][0] * m[1][2] * m[0][3];\n"
+        "    float cof32 = -(m[0][0] * m[1][1] * m[2][3] + m[1][0] * m[2][1] * m[0][3] + m[2][0] * m[0][1] * m[1][3]"
+                       " - m[0][0] * m[2][1] * m[1][3] - m[1][0] * m[0][1] * m[2][3] - m[2][0] * m[1][1] * m[0][3]);\n"
+        "    float cof33 = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2]"
+                       " - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] - m[2][0] * m[1][1] * m[0][2];\n"
+        "    float4x4 cof = { cof00, cof10, cof20, cof30, cof01, cof11, cof21, cof31,"
+                            " cof02, cof12, cof22, cof32, cof03, cof13, cof23, cof33 };\n"
+        "    return cof / determinant(transpose(m));\n"
+        "}\n");
 }
 
 void BuiltInFunctionEmulatorHLSL::OutputEmulatedFunctionDefinition(

diff --git a/src/compiler/translator/Initialize.cpp b/src/compiler/translator/Initialize.cpp
index aad374f..36c2f4a 100644
--- a/src/compiler/translator/Initialize.cpp
+++ b/src/compiler/translator/Initialize.cpp

@@ -338,6 +338,10 @@
     symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat3);
     symbolTable.insertBuiltIn(ESSL3_BUILTINS, float1, "determinant", mat4);
 
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat2, "inverse", mat2);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat3, "inverse", mat3);
+    symbolTable.insertBuiltIn(ESSL3_BUILTINS, mat4, "inverse", mat4);
+
     TType *bool1 = new TType(EbtBool);
     TType *bool2 = new TType(EbtBool, 2);
     TType *bool3 = new TType(EbtBool, 3);
@@ -809,6 +813,7 @@
     symbolTable.relateToOperator(ESSL3_BUILTINS, "outerProduct",  EOpOuterProduct);
     symbolTable.relateToOperator(ESSL3_BUILTINS, "transpose",     EOpTranspose);
     symbolTable.relateToOperator(ESSL3_BUILTINS, "determinant",   EOpDeterminant);
+    symbolTable.relateToOperator(ESSL3_BUILTINS, "inverse",       EOpInverse);
 
     symbolTable.relateToOperator(COMMON_BUILTINS, "any",          EOpAny);
     symbolTable.relateToOperator(COMMON_BUILTINS, "all",          EOpAll);

diff --git a/src/compiler/translator/IntermNode.h b/src/compiler/translator/IntermNode.h
index 3b97872..722765e 100644
--- a/src/compiler/translator/IntermNode.h
+++ b/src/compiler/translator/IntermNode.h

@@ -153,6 +153,7 @@
     EOpOuterProduct,
     EOpTranspose,
     EOpDeterminant,
+    EOpInverse,
 
     EOpAny,
     EOpAll,

diff --git a/src/compiler/translator/OutputGLSLBase.cpp b/src/compiler/translator/OutputGLSLBase.cpp
index 5553960..23dcef3 100644
--- a/src/compiler/translator/OutputGLSLBase.cpp
+++ b/src/compiler/translator/OutputGLSLBase.cpp

@@ -522,6 +522,9 @@
       case EOpDeterminant:
         preString = "determinant(";
         break;
+      case EOpInverse:
+        preString = "inverse(";
+        break;
 
       case EOpAny:
         preString = "any(";

diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 2af8c4e..40f8bd2 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp

@@ -1669,6 +1669,10 @@
         break;
       case EOpTranspose:        outputTriplet(visit, "transpose(", "", ")");   break;
       case EOpDeterminant:      outputTriplet(visit, "determinant(transpose(", "", "))"); break;
+      case EOpInverse:
+        ASSERT(node->getUseEmulatedFunction());
+        writeEmulatedFunctionTriplet(visit, "inverse(");
+        break;
 
       case EOpAny:              outputTriplet(visit, "any(", "", ")");       break;
       case EOpAll:              outputTriplet(visit, "all(", "", ")");       break;

diff --git a/src/compiler/translator/intermOut.cpp b/src/compiler/translator/intermOut.cpp
index 0138a8f..0a211a5 100644
--- a/src/compiler/translator/intermOut.cpp
+++ b/src/compiler/translator/intermOut.cpp

@@ -310,6 +310,7 @@
 
       case EOpDeterminant:    out << "determinant";          break;
       case EOpTranspose:      out << "transpose";            break;
+      case EOpInverse:        out << "inverse";              break;
 
       case EOpAny:            out << "any";                  break;
       case EOpAll:            out << "all";                  break;
commit	abf6dadd4983531c5cb8e28c0e3efbda3d283f0e	[log] [tgz]
author	Olli Etuaho <oetuaho@nvidia.com>	Wed Jan 14 14:45:16 2015 +0200
committer	Jamie Madill <jmadill@chromium.org>	Fri Jan 16 15:41:40 2015 +0000
tree	f8480a870e424fcca72c30eb33d6f7475065d011
parent	d68157fc4b0dddf56f7d11a230a96e898cdb4750 [diff]