Fixed non square matrix multiplication

A few cases of matrix multiplication weren't working properly
due to using the wrong matrix dimension. For example, if we
have the following matrix operation (using matColRow):
mat32 = mat22 * mat32
This results in 3 multiplication of 2 element vectors for each
row of the result and the current code (before this cl) would
only perform 2 multiplications and produce the wrong result.

This cl should fix all uses of the * operator and of the
matrixCompMult function when using non square matrices.

Change-Id: Id0dbfd9d65c20102220049c34435c37e3db7f9da
Reviewed-on: https://swiftshader-review.googlesource.com/3966
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index 5a7b108..85b6cca 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -533,11 +533,12 @@
 		case EOpVectorTimesMatrix:

 			if(visit == PostVisit)

 			{

-				int size = leftType.getNominalSize();

+				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());

 

+				int size = rightType.getNominalSize();

 				for(int i = 0; i < size; i++)

 				{

-					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, left, right);

+					Instruction *dot = emit(dpOpcode, result, left, right);

 					dot->dst.mask = 1 << i;

 					argument(dot->src[1], right, i);

 				}

@@ -549,7 +550,8 @@
 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);

 				mul->src[1].swizzle = 0x00;

 

-				for(int i = 1; i < leftType.getNominalSize(); i++)

+				int size = rightType.getNominalSize();

+				for(int i = 1; i < size; i++)

 				{

 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);

 					argument(mad->src[0], left, i);

@@ -562,7 +564,8 @@
 			{

 				int dim = leftType.getNominalSize();

 

-				for(int i = 0; i < dim; i++)

+				int size = rightType.getNominalSize();

+				for(int i = 0; i < size; i++)

 				{

 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);

 					mul->dst.index += i;

@@ -1209,9 +1212,12 @@
 		case EOpMul:

 			if(visit == PostVisit)

 			{

-				ASSERT(dim2(arg[0]) == dim2(arg[1]));

+				TIntermTyped *arg0 = arg[0]->getAsTyped();

+				TIntermTyped *arg1 = arg[1]->getAsTyped();

+				ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));

 

-				for(int i = 0; i < dim2(arg[0]); i++)

+				int size = arg0->getNominalSize();

+				for(int i = 0; i < size; i++)

 				{

 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, arg[0], arg[1]);

 					mul->dst.index += i;