powerpc: Simplify pSeries idle loop
Since pSeries only wants to do something different in the idle loop when
there is no work to do, we can simplify the code by implementing
ppc_md.power_save functions instead of complete idle loops. There are
two versions: one for shared-processor partitions and one for dedicated-
processor partitions.
With this we also do a cede_processor() call on dedicated processor
partitions if the poll_pending() call indicates that the hypervisor
has work it wants to do.
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 44d5c7f..213bf98 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -81,8 +81,8 @@
int fwnmi_active; /* TRUE if an FWNMI handler is present */
-static void pseries_shared_idle(void);
-static void pseries_dedicated_idle(void);
+static void pseries_shared_idle_sleep(void);
+static void pseries_dedicated_idle_sleep(void);
struct mpic *pSeries_mpic;
@@ -236,14 +236,13 @@
vpa_init(boot_cpuid);
if (get_lppaca()->shared_proc) {
printk(KERN_INFO "Using shared processor idle loop\n");
- ppc_md.idle_loop = pseries_shared_idle;
+ ppc_md.power_save = pseries_shared_idle_sleep;
} else {
printk(KERN_INFO "Using dedicated idle loop\n");
- ppc_md.idle_loop = pseries_dedicated_idle;
+ ppc_md.power_save = pseries_dedicated_idle_sleep;
}
} else {
printk(KERN_INFO "Using default idle loop\n");
- ppc_md.idle_loop = default_idle;
}
if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -393,136 +392,87 @@
DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-static inline void dedicated_idle_sleep(unsigned int cpu)
-{
- struct lppaca *plppaca = &lppaca[cpu ^ 1];
-
- /* Only sleep if the other thread is not idle */
- if (!(plppaca->idle)) {
- local_irq_disable();
-
- /*
- * We are about to sleep the thread and so wont be polling any
- * more.
- */
- clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb__after_clear_bit();
-
- /*
- * SMT dynamic mode. Cede will result in this thread going
- * dormant, if the partner thread is still doing work. Thread
- * wakes up if partner goes idle, an interrupt is presented, or
- * a prod occurs. Returning from the cede enables external
- * interrupts.
- */
- if (!need_resched())
- cede_processor();
- else
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- /*
- * Give the HV an opportunity at the processor, since we are
- * not doing any work.
- */
- poll_pending();
- }
-}
-
-static void pseries_dedicated_idle(void)
+static void pseries_dedicated_idle_sleep(void)
{
unsigned int cpu = smp_processor_id();
unsigned long start_snooze;
unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
- set_thread_flag(TIF_POLLING_NRFLAG);
- while (1) {
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
- if (!need_resched()) {
- start_snooze = get_tb() +
- *smt_snooze_delay * tb_ticks_per_usec;
+ /*
+ * We come in with interrupts disabled, and need_resched()
+ * has been checked recently. If we should poll for a little
+ * while, do so.
+ */
+ if (*smt_snooze_delay) {
+ start_snooze = get_tb() +
+ *smt_snooze_delay * tb_ticks_per_usec;
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
- while (!need_resched() && !cpu_is_offline(cpu)) {
- ppc64_runlatch_off();
-
- /*
- * Go into low thread priority and possibly
- * low power mode.
- */
- HMT_low();
- HMT_very_low();
-
- if (*smt_snooze_delay != 0 &&
- get_tb() > start_snooze) {
- HMT_medium();
- dedicated_idle_sleep(cpu);
- }
-
- }
-
- HMT_medium();
+ while (get_tb() < start_snooze) {
+ if (need_resched() || cpu_is_offline(cpu))
+ goto out;
+ ppc64_runlatch_off();
+ HMT_low();
+ HMT_very_low();
}
- get_lppaca()->idle = 0;
- ppc64_runlatch_on();
-
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
-
- if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
- cpu_die();
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ local_irq_disable();
+ if (need_resched() || cpu_is_offline(cpu))
+ goto out;
}
+
+ /*
+ * Cede if the other thread is not idle, so that it can
+ * go single-threaded. If the other thread is idle,
+ * we ask the hypervisor if it has pending work it
+ * wants to do and cede if it does. Otherwise we keep
+ * polling in order to reduce interrupt latency.
+ *
+ * Doing the cede when the other thread is active will
+ * result in this thread going dormant, meaning the other
+ * thread gets to run in single-threaded (ST) mode, which
+ * is slightly faster than SMT mode with this thread at
+ * very low priority. The cede enables interrupts, which
+ * doesn't matter here.
+ */
+ if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending)
+ cede_processor();
+
+out:
+ HMT_medium();
+ get_lppaca()->idle = 0;
}
-static void pseries_shared_idle(void)
+static void pseries_shared_idle_sleep(void)
{
unsigned int cpu = smp_processor_id();
- while (1) {
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
- while (!need_resched() && !cpu_is_offline(cpu)) {
- local_irq_disable();
- ppc64_runlatch_off();
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ */
+ cede_processor();
- /*
- * Yield the processor to the hypervisor. We return if
- * an external interrupt occurs (which are driven prior
- * to returning here) or if a prod occurs from another
- * processor. When returning here, external interrupts
- * are enabled.
- *
- * Check need_resched() again with interrupts disabled
- * to avoid a race.
- */
- if (!need_resched())
- cede_processor();
- else
- local_irq_enable();
-
- HMT_medium();
- }
-
- get_lppaca()->idle = 0;
- ppc64_runlatch_on();
-
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
-
- if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
- cpu_die();
- }
+ get_lppaca()->idle = 0;
}
static int pSeries_pci_probe_mode(struct pci_bus *bus)
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index 38ca9ad..b72c04f 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -9,6 +9,7 @@
#define H_Closed 2 /* Resource closed */
#define H_Constrained 4 /* Resource request constrained to max allowed */
#define H_InProgress 14 /* Kind of like busy */
+#define H_Pending 17 /* returned from H_POLL_PENDING */
#define H_Continue 18 /* Returned from H_Join on success */
#define H_LongBusyStartRange 9900 /* Start of long busy range */
#define H_LongBusyOrder1msec 9900 /* Long busy, hint that 1msec is a good time to retry */