PSCI: Optimize call paths if all participants are cache-coherent

The current PSCI implementation can apply certain optimizations upon the
assumption that all PSCI participants are cache-coherent.

  - Skip performing cache maintenance during power-up.

  - Skip performing cache maintenance during power-down:

    At present, on the power-down path, CPU driver disables caches and
    MMU, and performs cache maintenance in preparation for powering down
    the CPU. This means that PSCI must perform additional cache
    maintenance on the extant stack for correct functioning.

    If all participating CPUs are cache-coherent, CPU driver would
    neither disable MMU nor perform cache maintenance. The CPU being
    powered down, therefore, remain cache-coherent throughout all PSCI
    call paths. This in turn means that PSCI cache maintenance
    operations are not required during power down.

  - Choose spin locks instead of bakery locks:

    The current PSCI implementation must synchronize both cache-coherent
    and non-cache-coherent participants. Mutual exclusion primitives are
    not guaranteed to function on non-coherent memory. For this reason,
    the current PSCI implementation had to resort to bakery locks.

    If all participants are cache-coherent, the implementation can
    enable MMU and data caches early, and substitute bakery locks for
    spin locks. Spin locks make use of architectural mutual exclusion
    primitives, and are lighter and faster.

The optimizations are applied when HW_ASSISTED_COHERENCY build option is
enabled, as it's expected that all PSCI participants are cache-coherent
in those systems.

Change-Id: Iac51c3ed318ea7e2120f6b6a46fd2db2eae46ede
Signed-off-by: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index 7f0204a..a27e215 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -39,6 +39,7 @@
 #include <spinlock.h>
 
 #if HW_ASSISTED_COHERENCY
+
 /*
  * On systems with hardware-assisted coherency, make PSCI cache operations NOP,
  * as PSCI participants are cache-coherent, and there's no need for explicit
@@ -49,7 +50,21 @@
 #define psci_inv_cpu_data(member)
 
 #define psci_dsbish()
+
+/*
+ * On systems where participant CPUs are cache-coherent, we can use spinlocks
+ * instead of bakery locks.
+ */
+#define DEFINE_PSCI_LOCK(_name)		spinlock_t _name
+#define DECLARE_PSCI_LOCK(_name)	extern DEFINE_PSCI_LOCK(_name)
+
+#define psci_lock_get(non_cpu_pd_node)				\
+	spin_lock(&psci_locks[(non_cpu_pd_node)->lock_index])
+#define psci_lock_release(non_cpu_pd_node)			\
+	spin_unlock(&psci_locks[(non_cpu_pd_node)->lock_index])
+
 #else
+
 /*
  * If not all PSCI participants are cache-coherent, perform cache maintenance
  * and issue barriers wherever required to coordinate state.
@@ -59,19 +74,24 @@
 #define psci_inv_cpu_data(member)		inv_cpu_data(member)
 
 #define psci_dsbish()				dsbish()
-#endif
 
 /*
- * The following helper macros abstract the interface to the Bakery
- * Lock API.
+ * Use bakery locks for state coordination as not all PSCI participants are
+ * cache coherent.
  */
-#define psci_lock_init(non_cpu_pd_node, idx)			\
-	((non_cpu_pd_node)[(idx)].lock_index = (idx))
+#define DEFINE_PSCI_LOCK(_name)		DEFINE_BAKERY_LOCK(_name)
+#define DECLARE_PSCI_LOCK(_name)	DECLARE_BAKERY_LOCK(_name)
+
 #define psci_lock_get(non_cpu_pd_node)				\
 	bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index])
 #define psci_lock_release(non_cpu_pd_node)			\
 	bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index])
 
+#endif
+
+#define psci_lock_init(non_cpu_pd_node, idx)			\
+	((non_cpu_pd_node)[(idx)].lock_index = (idx))
+
 /*
  * The PSCI capability which are provided by the generic code but does not
  * depend on the platform or spd capabilities.
@@ -189,8 +209,8 @@
 extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
 extern unsigned int psci_caps;
 
-/* One bakery lock is required for each non-cpu power domain */
-DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+/* One lock is required per non-CPU power domain node */
+DECLARE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
 
 /*******************************************************************************
  * SPD's power management hooks registered with PSCI
@@ -227,6 +247,14 @@
 void psci_print_power_domain_map(void);
 unsigned int psci_is_last_on_cpu(void);
 int psci_spd_migrate_info(u_register_t *mpidr);
+void psci_do_pwrdown_sequence(unsigned int power_level);
+
+/*
+ * CPU power down is directly called only when HW_ASSISTED_COHERENCY is
+ * available. Otherwise, this needs post-call stack maintenance, which is
+ * handled in assembly.
+ */
+void prepare_cpu_pwr_dwn(unsigned int power_level);
 
 /* Private exported functions from psci_on.c */
 int psci_cpu_on_start(u_register_t target_cpu,