Merge "arm_fpga: Add support for topology self-discovery" into integration
diff --git a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
index aeed310..20120c9 100644
--- a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
+++ b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
@@ -7,6 +7,8 @@
#include <arch.h>
#include <asm_macros.S>
#include <common/bl_common.h>
+#include "../fpga_private.h"
+
#include <platform_def.h>
.globl plat_get_my_entrypoint
@@ -14,10 +16,10 @@
.globl plat_is_my_cpu_primary
.globl platform_mem_init
.globl plat_my_core_pos
- .globl plat_fpga_calc_core_pos
.globl plat_crash_console_init
.globl plat_crash_console_putc
.globl plat_crash_console_flush
+ .globl plat_fpga_calc_core_pos
/* -----------------------------------------------------------------------
* Indicate a cold boot for every CPU - warm boot is unsupported for the
@@ -34,23 +36,59 @@
* -----------------------------------------------------------------------
*/
func plat_secondary_cold_boot_setup
+
+ /*
+ * Wait for the primary processor to initialise the .BSS segment
+ * to avoid a race condition that would erase fpga_valid_mpids
+ * if it is populated before the C runtime is ready.
+ *
+ * We cannot use the current spin-lock implementation until the
+ * runtime is up and we should not rely on sevl/wfe instructions as
+ * it is optional whether they are implemented or not, so we use
+ * a global variable as lock and wait for the primary processor to
+ * finish the C runtime bring-up.
+ */
+
+ ldr w0, =C_RUNTIME_READY_KEY
+ adrp x1, secondary_core_spinlock
+ add x1, x1, :lo12:secondary_core_spinlock
+1:
+ wfe
+ ldr w2, [x1]
+ cmp w2, w0
+ b.ne 1b
+ /* Prevent reordering of the store into fpga_valid_mpids below */
+ dmb ish
+
+ mov x10, x30
+ bl plat_my_core_pos
+ mov x30, x10
+
+ adrp x4, fpga_valid_mpids
+ add x4, x4, :lo12:fpga_valid_mpids
+ mov x5, #VALID_MPID
+ strb w5, [x4, x0]
+
/*
* Poll the CPU's hold entry until it indicates to jump
* to the entrypoint address.
*/
- bl plat_my_core_pos
- lsl x0, x0, #PLAT_FPGA_HOLD_ENTRY_SHIFT
- ldr x1, =hold_base
- ldr x2, =fpga_sec_entrypoint
+
+ adrp x1, hold_base
+ add x1, x1, :lo12:hold_base
poll_hold_entry:
- ldr x3, [x1, x0]
+ ldr x3, [x1, x0, LSL #PLAT_FPGA_HOLD_ENTRY_SHIFT]
cmp x3, #PLAT_FPGA_HOLD_STATE_GO
b.ne 1f
+
+ adrp x2, fpga_sec_entrypoint
+ add x2, x2, :lo12:fpga_sec_entrypoint
ldr x3, [x2]
br x3
1:
wfe
b poll_hold_entry
+
endfunc plat_secondary_cold_boot_setup
/* -----------------------------------------------------------------------
@@ -73,12 +111,16 @@
endfunc platform_mem_init
func plat_my_core_pos
+ ldr x1, =(MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT))
mrs x0, mpidr_el1
+ and x0, x0, x1
b plat_fpga_calc_core_pos
+
endfunc plat_my_core_pos
/* -----------------------------------------------------------------------
- * unsigned int plat_fpga_calc_core_pos(u_register_t mpidr)
+ * unsigned int plat_fpga_calc_core_pos (uint32_t mpid)
+ * Clobber registers: x0 to x5
* -----------------------------------------------------------------------
*/
func plat_fpga_calc_core_pos
@@ -88,6 +130,7 @@
*
* If not set, shift MPIDR to left to make it look as if in a
* multi-threaded implementation.
+ *
*/
tst x0, #MPIDR_MT_MASK
lsl x3, x0, #MPIDR_AFFINITY_BITS
@@ -98,11 +141,13 @@
ubfx x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
ubfx x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
- /* Compute linear position */
mov x4, #FPGA_MAX_CPUS_PER_CLUSTER
- madd x1, x2, x4, x1
mov x5, #FPGA_MAX_PE_PER_CPU
+
+ /* Compute linear position */
+ madd x1, x2, x4, x1
madd x0, x1, x5, x0
+
ret
endfunc plat_fpga_calc_core_pos
diff --git a/plat/arm/board/arm_fpga/fpga_bl31_setup.c b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
index e4b9767..6eeff45 100644
--- a/plat/arm/board/arm_fpga/fpga_bl31_setup.c
+++ b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
@@ -7,23 +7,23 @@
#include <assert.h>
#include <common/fdt_wrappers.h>
+#include <drivers/delay_timer.h>
#include <drivers/generic_delay_timer.h>
-#include <lib/mmio.h>
#include <libfdt.h>
+#include "fpga_private.h"
#include <plat/common/platform.h>
#include <platform_def.h>
-#include "fpga_private.h"
-
static entry_point_info_t bl33_image_ep_info;
+volatile uint32_t secondary_core_spinlock;
uintptr_t plat_get_ns_image_entrypoint(void)
{
#ifdef PRELOADED_BL33_BASE
return PRELOADED_BL33_BASE;
#else
- return 0;
+ return 0ULL;
#endif
}
@@ -35,6 +35,17 @@
void bl31_early_platform_setup2(u_register_t arg0, u_register_t arg1,
u_register_t arg2, u_register_t arg3)
{
+ /* Add this core to the VALID mpids list */
+ fpga_valid_mpids[plat_my_core_pos()] = VALID_MPID;
+
+ /*
+ * Notify the secondary CPUs that the C runtime is ready
+ * so they can announce themselves.
+ */
+ secondary_core_spinlock = C_RUNTIME_READY_KEY;
+ dsbish();
+ sev();
+
fpga_console_init();
bl33_image_ep_info.pc = plat_get_ns_image_entrypoint();
@@ -54,11 +65,37 @@
void bl31_platform_setup(void)
{
- /* Initialize the GIC driver, cpu and distributor interfaces */
- plat_fpga_gic_init();
-
/* Write frequency to CNTCRL and initialize timer */
generic_delay_timer_init();
+
+ /*
+ * Before doing anything else, wait for some time to ensure that
+ * the secondary CPUs have populated the fpga_valid_mpids array.
+ * As the number of secondary cores is unknown and can even be 0,
+ * it is not possible to rely on any signal from them, so use a
+ * delay instead.
+ */
+ mdelay(5);
+
+ /*
+ * On the event of a cold reset issued by, for instance, a reset pin
+ * assertion, we cannot guarantee memory to be initialized to zero.
+ * In such scenario, if the secondary cores reached
+ * plat_secondary_cold_boot_setup before the primary one initialized
+ * .BSS, we could end up having a race condition if the spinlock
+ * was not cleared before.
+ *
+ * Similarly, if there were a reset before the spinlock had been
+ * cleared, the secondary cores would find the lock opened before
+ * .BSS is cleared, causing another race condition.
+ *
+ * So clean the spinlock as soon as we think it is safe to reduce the
+ * chances of any race condition on a reset.
+ */
+ secondary_core_spinlock = 0UL;
+
+ /* Initialize the GIC driver, cpu and distributor interfaces */
+ plat_fpga_gic_init();
}
entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
diff --git a/plat/arm/board/arm_fpga/fpga_def.h b/plat/arm/board/arm_fpga/fpga_def.h
index 5f1951f..2884ea6 100644
--- a/plat/arm/board/arm_fpga/fpga_def.h
+++ b/plat/arm/board/arm_fpga/fpga_def.h
@@ -18,6 +18,7 @@
* that are present will still be indexed appropriately regardless of any empty
* entries in the array used to represent the topology.
*/
+
#define FPGA_MAX_CLUSTER_COUNT 4
#define FPGA_MAX_CPUS_PER_CLUSTER 8
#define FPGA_MAX_PE_PER_CPU 4
diff --git a/plat/arm/board/arm_fpga/fpga_private.h b/plat/arm/board/arm_fpga/fpga_private.h
index 7545bd1..46287ad 100644
--- a/plat/arm/board/arm_fpga/fpga_private.h
+++ b/plat/arm/board/arm_fpga/fpga_private.h
@@ -7,12 +7,23 @@
#ifndef FPGA_PRIVATE_H
#define FPGA_PRIVATE_H
-unsigned int plat_fpga_calc_core_pos(u_register_t mpidr);
+#include "../fpga_def.h"
+#include <platform_def.h>
+
+#define C_RUNTIME_READY_KEY (0xaa55aa55)
+#define VALID_MPID (1U)
+
+#ifndef __ASSEMBLER__
+
+extern unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
void fpga_console_init(void);
void plat_fpga_gic_init(void);
void fpga_pwr_gic_on_finish(void);
void fpga_pwr_gic_off(void);
+unsigned int plat_fpga_calc_core_pos(uint32_t mpid);
-#endif
+#endif /* __ASSEMBLER__ */
+
+#endif /* FPGA_PRIVATE_H */
diff --git a/plat/arm/board/arm_fpga/fpga_topology.c b/plat/arm/board/arm_fpga/fpga_topology.c
index a2908d7..7fead86 100644
--- a/plat/arm/board/arm_fpga/fpga_topology.c
+++ b/plat/arm/board/arm_fpga/fpga_topology.c
@@ -5,15 +5,20 @@
*/
#include <arch_helpers.h>
+#include <common/debug.h>
+#include <lib/spinlock.h>
#include "fpga_private.h"
+#include <plat/common/platform.h>
#include <platform_def.h>
-static unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
+unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
+unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
const unsigned char *plat_get_power_domain_tree_desc(void)
{
- int i;
+ unsigned int i;
+
/*
* The highest level is the system level. The next level is constituted
* by clusters and then cores in clusters.
@@ -21,12 +26,26 @@
* This description of the power domain topology is aligned with the CPU
* indices returned by the plat_core_pos_by_mpidr() and plat_my_core_pos()
* APIs.
+ *
+ * A description of the topology tree can be found at
+ * https://trustedfirmware-a.readthedocs.io/en/latest/design/psci-pd-tree.html#design
*/
- fpga_power_domain_tree_desc[0] = 1;
- fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
- for (i = 0; i < FPGA_MAX_CLUSTER_COUNT; i++) {
- fpga_power_domain_tree_desc[i + 2] = FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU;
+ if (fpga_power_domain_tree_desc[0] == 0U) {
+ /*
+ * As fpga_power_domain_tree_desc[0] == 0, assume that the
+ * Power Domain Topology Tree has not been initialized, so
+ * perform the initialization here.
+ */
+
+ fpga_power_domain_tree_desc[0] = 1U;
+ fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
+
+ for (i = 0U; i < FPGA_MAX_CLUSTER_COUNT; i++) {
+ fpga_power_domain_tree_desc[2 + i] =
+ (FPGA_MAX_CPUS_PER_CLUSTER *
+ FPGA_MAX_PE_PER_CPU);
+ }
}
return fpga_power_domain_tree_desc;
@@ -34,40 +53,25 @@
int plat_core_pos_by_mpidr(u_register_t mpidr)
{
- unsigned int cluster_id, cpu_id, thread_id;
+ unsigned int core_pos;
- /*
- * The image running on the FPGA may or may not implement
- * multithreading, and it shouldn't be assumed this is consistent
- * across all CPUs.
- * This ensures that any passed mpidr values reflect the status of the
- * primary CPU's MT bit.
- */
+ mpidr &= (MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT));
mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
- mpidr &= MPID_MASK;
- if (mpidr & MPIDR_MT_MASK) {
- thread_id = MPIDR_AFFLVL0_VAL(mpidr);
- cpu_id = MPIDR_AFFLVL1_VAL(mpidr);
- cluster_id = MPIDR_AFFLVL2_VAL(mpidr);
- } else {
- thread_id = 0;
- cpu_id = MPIDR_AFFLVL0_VAL(mpidr);
- cluster_id = MPIDR_AFFLVL1_VAL(mpidr);
+ if ((MPIDR_AFFLVL2_VAL(mpidr) >= FPGA_MAX_CLUSTER_COUNT) ||
+ (MPIDR_AFFLVL1_VAL(mpidr) >= FPGA_MAX_CPUS_PER_CLUSTER) ||
+ (MPIDR_AFFLVL0_VAL(mpidr) >= FPGA_MAX_PE_PER_CPU)) {
+ ERROR ("Invalid mpidr: 0x%08x\n", (uint32_t)mpidr);
+ panic();
}
- if (cluster_id >= FPGA_MAX_CLUSTER_COUNT) {
+ /* Calculate the core position, based on the maximum topology. */
+ core_pos = plat_fpga_calc_core_pos(mpidr);
+
+ /* Check whether this core is actually present. */
+ if (fpga_valid_mpids[core_pos] != VALID_MPID) {
return -1;
}
- if (cpu_id >= FPGA_MAX_CPUS_PER_CLUSTER) {
- return -1;
- }
-
- if (thread_id >= FPGA_MAX_PE_PER_CPU) {
- return -1;
- }
-
- /* Calculate the correct core, catering for multi-threaded images */
- return (int) plat_fpga_calc_core_pos(mpidr);
+ return core_pos;
}
diff --git a/plat/arm/board/arm_fpga/include/platform_def.h b/plat/arm/board/arm_fpga/include/platform_def.h
index 31fc987..411b936 100644
--- a/plat/arm/board/arm_fpga/include/platform_def.h
+++ b/plat/arm/board/arm_fpga/include/platform_def.h
@@ -21,11 +21,12 @@
#define CACHE_WRITEBACK_SHIFT U(6)
#define CACHE_WRITEBACK_GRANULE (U(1) << CACHE_WRITEBACK_SHIFT)
-#define PLATFORM_CORE_COUNT \
- (FPGA_MAX_CLUSTER_COUNT * FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU)
+#define PLATFORM_CORE_COUNT \
+ (FPGA_MAX_CLUSTER_COUNT * \
+ FPGA_MAX_CPUS_PER_CLUSTER * \
+ FPGA_MAX_PE_PER_CPU)
-#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + \
- PLATFORM_CORE_COUNT) + 1
+#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + PLATFORM_CORE_COUNT + 1)
#if !ENABLE_PIE
#define BL31_BASE UL(0x80000000)