TF-A: Fix BL31 linker script error

The patch fixes BL31 linker script error
"Init code ends past the end of the stacks"
for platforms with number of CPUs less than 4,
which is caused by __STACKS_END__ address being
lower than __INIT_CODE_END__.
The modified BL31 linker script detects such cases
and increases the total amount of stack memory,
setting __STACKS_END__ = __INIT_CODE_END__, and
CPUs' stacks are calculated by BL31 'plat_get_my_stack'
function accordingly. For platforms with more than 4 CPUs
and __INIT_CODE_END__ < __STACKS_END__ stack memory does not
increase and allocated CPUs' stacks match the existing
implementation.
The patch removes exclusion of PSCI initialization
functions from the reclaimed .init section in
'arm_reclaim_init.ld.S' script, which increases the
size of reclaimed memory region.

Change-Id: I927773e00dd84e1ffe72f9ee534f4f2fc7b6153c
Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
diff --git a/include/common/bl_common.ld.h b/include/common/bl_common.ld.h
index 97fed72..208e3d6 100644
--- a/include/common/bl_common.ld.h
+++ b/include/common/bl_common.ld.h
@@ -101,12 +101,14 @@
 		__DATA_END__ = .;			\
 	}
 
+#if !(defined(IMAGE_BL31) && RECLAIM_INIT_CODE)
 #define STACK_SECTION					\
 	stacks (NOLOAD) : {				\
 		__STACKS_START__ = .;			\
 		*(tzfw_normal_stacks)			\
 		__STACKS_END__ = .;			\
 	}
+#endif
 
 /*
  * If BL doesn't use any bakery lock then __PERCPU_BAKERY_LOCK_SIZE__
diff --git a/include/plat/arm/common/arm_reclaim_init.ld.S b/include/plat/arm/common/arm_reclaim_init.ld.S
index b5bf473..03976f3 100644
--- a/include/plat/arm/common/arm_reclaim_init.ld.S
+++ b/include/plat/arm/common/arm_reclaim_init.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -12,11 +12,7 @@
             . = . + PLATFORM_STACK_SIZE;
             . = ALIGN(PAGE_SIZE);
             __INIT_CODE_START__ = .;
-            /*
-             * Exclude PSCI initialization functions to ensure the init section
-             * does not become larger than the overlaid stack region
-             */
-            *(EXCLUDE_FILE (*psci_setup.o).text.init*)
+	    *(*text.init*);
             __INIT_CODE_UNALIGNED__ = .;
             .  = ALIGN(PAGE_SIZE);
             __INIT_CODE_END__ = .;
@@ -32,4 +28,41 @@
 
 }
 
+#undef	MIN
+#define	ABS		ABSOLUTE
+#define	COUNT		PLATFORM_CORE_COUNT
+#define	ALIGN_MASK	~(CACHE_WRITEBACK_GRANULE - 1)
+
+#define PRIMARY_STACK							\
+	__STACKS_START__ = .;						\
+	*(tzfw_normal_stacks)						\
+	OFFSET = ABS(SIZEOF(.init) - (. - __STACKS_START__));		\
+	/* Offset sign */						\
+	SIGN = ABS(OFFSET) & (1 << 63);					\
+	/* Offset mask */						\
+	MASK = ABS(SIGN >> 63) - 1;					\
+	. +=  ABS(OFFSET) & ABS(MASK);					\
+	__STACKS_END__ = .;						\
+	/* Total stack size */						\
+	SIZE = ABS(. - __STACKS_START__);				\
+	/* Maximum primary CPU stack */					\
+	STACK = ABS(__STACKS_START__ + SIZE / COUNT) & ALIGN_MASK;	\
+	/* Primary CPU stack */						\
+	__PRIMARY_STACK__ = MIN(STACK, ABS(__INIT_CODE_START__));
+
+#if (COUNT > 1)
+#define	SECONDARY_STACK					\
+	/* Size of the secondary CPUs' stack */		\
+	REST = ABS(__STACKS_END__ - __PRIMARY_STACK__);	\
+	/* Secondary per-CPU stack size */		\
+	__STACK_SIZE__ = ABS(REST / (COUNT - 1));
+#else
+#define	SECONDARY_STACK
+#endif
+
+#define STACK_SECTION		\
+	stacks (NOLOAD) : {	\
+		PRIMARY_STACK	\
+		SECONDARY_STACK	\
+	}
 #endif /* ARM_RECLAIM_INIT_LD_S */
diff --git a/plat/common/aarch64/platform_mp_stack.S b/plat/common/aarch64/platform_mp_stack.S
index f9780e8..e2d71da 100644
--- a/plat/common/aarch64/platform_mp_stack.S
+++ b/plat/common/aarch64/platform_mp_stack.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2020, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -32,9 +32,41 @@
 	 * -----------------------------------------------------
 	 */
 func plat_get_my_stack
-	mov	x10, x30 // lr
+#if (defined(IMAGE_BL31) && RECLAIM_INIT_CODE)
+#if (PLATFORM_CORE_COUNT == 1)
+	/* Single CPU */
+	adrp	x0, __PRIMARY_STACK__
+	add	x0, x0, :lo12:__PRIMARY_STACK__
+	ret
+#else
+	mov	x10, x30
+	bl	plat_my_core_pos
+	cbnz	x0, 2f
+
+	/* Primary CPU */
+	adrp	x0, __PRIMARY_STACK__
+	add	x0, x0, :lo12:__PRIMARY_STACK__
+	ret	x10
+
+	/* Secondary CPU */
+2:	sub	x0, x0, #(PLATFORM_CORE_COUNT - 1)
+	adrp	x1, __STACKS_END__
+	adrp	x2, __STACK_SIZE__
+	add	x1, x1, :lo12:__STACKS_END__
+	add	x2, x2, :lo12:__STACK_SIZE__
+
+	madd	x0, x0, x2, x1
+	bic	x0, x0, #(CACHE_WRITEBACK_GRANULE - 1)
+	ret	x10
+#endif
+	.word	platform_normal_stacks
+
+#else /* !(IMAGE_BL31 && RECLAIM_INIT_CODE) */
+	mov	x10, x30
 	get_my_mp_stack platform_normal_stacks, PLATFORM_STACK_SIZE
 	ret	x10
+
+#endif /* IMAGE_BL31 && RECLAIM_INIT_CODE */
 endfunc plat_get_my_stack
 
 	/* -----------------------------------------------------
@@ -45,14 +77,14 @@
 	 * -----------------------------------------------------
 	 */
 func plat_set_my_stack
-	mov	x9, x30 // lr
+	mov	x9, x30
 	bl 	plat_get_my_stack
 	mov	sp, x0
 	ret	x9
 endfunc plat_set_my_stack
 
 	/* -----------------------------------------------------
-	 * Per-cpu stacks in normal memory. Each cpu gets a
+	 * Per-CPU stacks in normal memory. Each CPU gets a
 	 * stack of PLATFORM_STACK_SIZE bytes.
 	 * -----------------------------------------------------
 	 */