FVP: Fix function for translating MPIDR to linear index

The current AArch32 version of plat_arm_calc_core_pos uses an incorrect
algorithm to calculate the linear position of a core / PE from its
MPIDR.

This patch corrects the algorithm to:

(ClusterId * FVP_MAX_CPUS_PER_CLUSTER) * FVP_MAX_PE_PER_CPU
+ (CPUId * FVP_MAX_PE_PER_CPU)
+ ThreadId

which supports cores where there are more than 1 PE per CPU.

NOTE: the AArch64 version was fixed in 39b21d1

Change-Id: I72aea89d8f72f8b1fef54e2177a0fa6fef0f5513
Signed-off-by: David Cunado <david.cunado@arm.com>
diff --git a/plat/arm/board/fvp/aarch32/fvp_helpers.S b/plat/arm/board/fvp/aarch32/fvp_helpers.S
index 143972d..5d88546 100644
--- a/plat/arm/board/fvp/aarch32/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch32/fvp_helpers.S
@@ -104,15 +104,20 @@
 	bx	lr
 endfunc plat_is_my_cpu_primary
 
-	/* -----------------------------------------------------
+	/* ---------------------------------------------------------------------
 	 * unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
 	 *
 	 * Function to calculate the core position on FVP.
 	 *
-	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER) +
+	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER * FVP_MAX_PE_PER_CPU) +
 	 * (CPUId * FVP_MAX_PE_PER_CPU) +
 	 * ThreadId
-	 * -----------------------------------------------------
+	 *
+	 * which can be simplified as:
+	 *
+	 * ((ClusterId * FVP_MAX_CPUS_PER_CLUSTER + CPUId) * FVP_MAX_PE_PER_CPU)
+	 * + ThreadId
+	 * ---------------------------------------------------------------------
 	 */
 func plat_arm_calc_core_pos
 	mov	r3, r0
@@ -125,14 +130,15 @@
 	lsleq	r3, r0, #MPIDR_AFFINITY_BITS
 
 	/* Extract individual affinity fields from MPIDR */
-	mov	r2, #FVP_MAX_PE_PER_CPU
 	ubfx	r0, r3, #MPIDR_AFF0_SHIFT, #MPIDR_AFFINITY_BITS
 	ubfx	r1, r3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
-	mla	r0, r1, r2, r0
-
-	mov	r1, #FVP_MAX_CPUS_PER_CLUSTER
 	ubfx	r2, r3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
-	mla	r0, r1, r2, r0
+
+	/* Compute linear position */
+	mov	r3, #FVP_MAX_CPUS_PER_CLUSTER
+	mla	r1, r2, r3, r1
+	mov	r3, #FVP_MAX_PE_PER_CPU
+	mla	r0, r1, r3, r0
 
 	bx	lr
 endfunc plat_arm_calc_core_pos