Matrix determinant and inverse implementation

Implementation for determinant has been done directly in
ShaderCore in order to avoid having to allocate temporaries
manually in OutputASM.

For now, the implementation for the inverse matrix is very
simple, i.e., it doesn't attempt to re-use results from the
cofactor matrix computation to compute the determinant or
do any other kind of optimization, but it works.

Change-Id: I0fc70133809ae2752dc567bf58b60d7af7a88009
Reviewed-on: https://swiftshader-review.googlesource.com/4000
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index 406b038..6ad3953 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -1139,6 +1139,34 @@
 		Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
 	}
 
+	void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
+	{
+		dst.x = src0.x * src1.y - src0.y * src1.x;
+		dst.y = dst.z = dst.w = dst.x;
+	}
+
+	void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
+	{
+		crs(dst, src1, src2);
+		dp3(dst, dst, src0);
+	}
+
+	void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
+	{
+		dst.x = src2.z * src3.w - src2.w * src3.z;
+		dst.y = src1.w * src3.z - src1.z * src3.w;
+		dst.z = src1.z * src2.w - src1.w * src2.z;
+		dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
+		        src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
+		        src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
+		                  src2.x * (src1.w * src3.y - src1.y * src3.w) +
+		                  src3.x * (src1.y * src2.w - src1.w * src2.y)) +
+		        src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
+		                  src2.x * (src1.y * src3.z - src1.z * src3.y) +
+		                  src3.x * (src1.z * src2.y - src1.y * src2.z));
+		dst.y = dst.z = dst.w = dst.x;
+	}
+
 	void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
 	{
 		dst.x = Frac(src.x);