hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback
Currently all highres=off timers are run from softirq context, but
HRTIMER_CB_IRQSAFE_NO_SOFTIRQ timers expect to run from irq context.
Fix this up by splitting it similar to the highres=on case.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index ecc8e26..49067f1 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -115,10 +115,8 @@
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
unsigned long state;
-#ifdef CONFIG_HIGH_RES_TIMERS
enum hrtimer_cb_mode cb_mode;
struct list_head cb_entry;
-#endif
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
@@ -194,10 +192,10 @@
spinlock_t lock;
struct lock_class_key lock_key;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ struct list_head cb_pending;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
- struct list_head cb_pending;
unsigned long nr_events;
#endif
};
@@ -319,6 +317,7 @@
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
+extern void hrtimer_run_pending(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9f850ca..061ae28 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -325,6 +325,22 @@
}
#endif /* BITS_PER_LONG >= 64 */
+/*
+ * Check, whether the timer is on the callback pending list
+ */
+static inline int hrtimer_cb_pending(const struct hrtimer *timer)
+{
+ return timer->state & HRTIMER_STATE_PENDING;
+}
+
+/*
+ * Remove a timer from the callback pending list
+ */
+static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
+{
+ list_del_init(&timer->cb_entry);
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -494,29 +510,12 @@
}
/*
- * Check, whether the timer is on the callback pending list
- */
-static inline int hrtimer_cb_pending(const struct hrtimer *timer)
-{
- return timer->state & HRTIMER_STATE_PENDING;
-}
-
-/*
- * Remove a timer from the callback pending list
- */
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
-{
- list_del_init(&timer->cb_entry);
-}
-
-/*
* Initialize the high resolution related parts of cpu_base
*/
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
base->hres_active = 0;
- INIT_LIST_HEAD(&base->cb_pending);
}
/*
@@ -524,7 +523,6 @@
*/
static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
{
- INIT_LIST_HEAD(&timer->cb_entry);
}
/*
@@ -618,10 +616,13 @@
{
return 0;
}
-static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ return 0;
+}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1001,6 +1002,7 @@
clock_id = CLOCK_MONOTONIC;
timer->base = &cpu_base->clock_base[clock_id];
+ INIT_LIST_HEAD(&timer->cb_entry);
hrtimer_init_timer_hres(timer);
#ifdef CONFIG_TIMER_STATS
@@ -1030,118 +1032,8 @@
}
EXPORT_SYMBOL_GPL(hrtimer_get_res);
-#ifdef CONFIG_HIGH_RES_TIMERS
-
-/*
- * High resolution timer interrupt
- * Called with interrupts disabled
- */
-void hrtimer_interrupt(struct clock_event_device *dev)
+static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
- struct hrtimer_clock_base *base;
- ktime_t expires_next, now;
- int i, raise = 0;
-
- BUG_ON(!cpu_base->hres_active);
- cpu_base->nr_events++;
- dev->next_event.tv64 = KTIME_MAX;
-
- retry:
- now = ktime_get();
-
- expires_next.tv64 = KTIME_MAX;
-
- base = cpu_base->clock_base;
-
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- ktime_t basenow;
- struct rb_node *node;
-
- spin_lock(&cpu_base->lock);
-
- basenow = ktime_add(now, base->offset);
-
- while ((node = base->first)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
- struct hrtimer *timer;
- int restart;
-
- timer = rb_entry(node, struct hrtimer, node);
-
- if (basenow.tv64 < timer->expires.tv64) {
- ktime_t expires;
-
- expires = ktime_sub(timer->expires,
- base->offset);
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
- break;
- }
-
- /* Move softirq callbacks to the pending list */
- if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
- __remove_hrtimer(timer, base,
- HRTIMER_STATE_PENDING, 0);
- list_add_tail(&timer->cb_entry,
- &base->cpu_base->cb_pending);
- raise = 1;
- continue;
- }
-
- __remove_hrtimer(timer, base,
- HRTIMER_STATE_CALLBACK, 0);
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
- /*
- * Used for scheduler timers, avoid lock
- * inversion with rq->lock and tasklist_lock.
- *
- * These timers are required to deal with
- * enqueue expiry themselves and are not
- * allowed to migrate.
- */
- spin_unlock(&cpu_base->lock);
- restart = fn(timer);
- spin_lock(&cpu_base->lock);
- } else
- restart = fn(timer);
-
- /*
- * Note: We clear the CALLBACK bit after
- * enqueue_hrtimer to avoid reprogramming of
- * the event hardware. This happens at the end
- * of this function anyway.
- */
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
- enqueue_hrtimer(timer, base, 0);
- }
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- }
- spin_unlock(&cpu_base->lock);
- base++;
- }
-
- cpu_base->expires_next = expires_next;
-
- /* Reprogramming necessary ? */
- if (expires_next.tv64 != KTIME_MAX) {
- if (tick_program_event(expires_next, 0))
- goto retry;
- }
-
- /* Raise softirq ? */
- if (raise)
- raise_softirq(HRTIMER_SOFTIRQ);
-}
-
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
spin_lock_irq(&cpu_base->lock);
while (!list_empty(&cpu_base->cb_pending)) {
@@ -1182,56 +1074,126 @@
spin_unlock_irq(&cpu_base->lock);
}
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
-/*
- * Expire the per base hrtimer-queue:
- */
-static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
- int index)
+static void __run_hrtimer(struct hrtimer *timer)
{
- struct rb_node *node;
- struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
+ struct hrtimer_clock_base *base = timer->base;
+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ int restart;
- if (!base->first)
- return;
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ timer_stats_account_hrtimer(timer);
- if (base->get_softirq_time)
- base->softirq_time = base->get_softirq_time();
-
- spin_lock_irq(&cpu_base->lock);
-
- while ((node = base->first)) {
- struct hrtimer *timer;
- enum hrtimer_restart (*fn)(struct hrtimer *);
- int restart;
-
- timer = rb_entry(node, struct hrtimer, node);
- if (base->softirq_time.tv64 <= timer->expires.tv64)
- break;
-
-#ifdef CONFIG_HIGH_RES_TIMERS
- WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
-#endif
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
-
+ fn = timer->function;
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ /*
+ * Used for scheduler timers, avoid lock inversion with
+ * rq->lock and tasklist_lock.
+ *
+ * These timers are required to deal with enqueue expiry
+ * themselves and are not allowed to migrate.
+ */
+ spin_unlock(&cpu_base->lock);
+ restart = fn(timer);
+ spin_lock(&cpu_base->lock);
+ } else
restart = fn(timer);
- spin_lock_irq(&cpu_base->lock);
-
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(hrtimer_active(timer));
- enqueue_hrtimer(timer, base, 0);
- }
+ /*
+ * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
+ * reprogramming of the event hardware. This happens at the end of this
+ * function anyway.
+ */
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ enqueue_hrtimer(timer, base, 0);
}
- spin_unlock_irq(&cpu_base->lock);
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
}
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+ */
+void hrtimer_interrupt(struct clock_event_device *dev)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ struct hrtimer_clock_base *base;
+ ktime_t expires_next, now;
+ int i, raise = 0;
+
+ BUG_ON(!cpu_base->hres_active);
+ cpu_base->nr_events++;
+ dev->next_event.tv64 = KTIME_MAX;
+
+ retry:
+ now = ktime_get();
+
+ expires_next.tv64 = KTIME_MAX;
+
+ base = cpu_base->clock_base;
+
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ ktime_t basenow;
+ struct rb_node *node;
+
+ spin_lock(&cpu_base->lock);
+
+ basenow = ktime_add(now, base->offset);
+
+ while ((node = base->first)) {
+ struct hrtimer *timer;
+
+ timer = rb_entry(node, struct hrtimer, node);
+
+ if (basenow.tv64 < timer->expires.tv64) {
+ ktime_t expires;
+
+ expires = ktime_sub(timer->expires,
+ base->offset);
+ if (expires.tv64 < expires_next.tv64)
+ expires_next = expires;
+ break;
+ }
+
+ /* Move softirq callbacks to the pending list */
+ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+ __remove_hrtimer(timer, base,
+ HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ raise = 1;
+ continue;
+ }
+
+ __run_hrtimer(timer);
+ }
+ spin_unlock(&cpu_base->lock);
+ base++;
+ }
+
+ cpu_base->expires_next = expires_next;
+
+ /* Reprogramming necessary ? */
+ if (expires_next.tv64 != KTIME_MAX) {
+ if (tick_program_event(expires_next, 0))
+ goto retry;
+ }
+
+ /* Raise softirq ? */
+ if (raise)
+ raise_softirq(HRTIMER_SOFTIRQ);
+}
+
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+ run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
+}
+
+#endif /* CONFIG_HIGH_RES_TIMERS */
+
/*
* Called from timer softirq every jiffy, expire hrtimers:
*
@@ -1239,10 +1201,9 @@
* softirq context in case the hrtimer initialization failed or has
* not been done yet.
*/
-void hrtimer_run_queues(void)
+void hrtimer_run_pending(void)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
- int i;
if (hrtimer_hres_active())
return;
@@ -1256,8 +1217,54 @@
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- if (hrtimer_switch_to_hres())
- return;
+ hrtimer_switch_to_hres();
+
+ run_hrtimer_pending(cpu_base);
+}
+
+/*
+ * Called from hardirq context every jiffy
+ */
+static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
+ int index)
+{
+ struct rb_node *node;
+ struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
+
+ if (!base->first)
+ return;
+
+ if (base->get_softirq_time)
+ base->softirq_time = base->get_softirq_time();
+
+ spin_lock(&cpu_base->lock);
+
+ while ((node = base->first)) {
+ struct hrtimer *timer;
+
+ timer = rb_entry(node, struct hrtimer, node);
+ if (base->softirq_time.tv64 <= timer->expires.tv64)
+ break;
+
+ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+ __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ continue;
+ }
+
+ __run_hrtimer(timer);
+ }
+ spin_unlock(&cpu_base->lock);
+}
+
+void hrtimer_run_queues(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ int i;
+
+ if (hrtimer_hres_active())
+ return;
hrtimer_get_softirq_time(cpu_base);
@@ -1407,6 +1414,7 @@
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
cpu_base->clock_base[i].cpu_base = cpu_base;
+ INIT_LIST_HEAD(&cpu_base->cb_pending);
hrtimer_init_hres(cpu_base);
}
diff --git a/kernel/timer.c b/kernel/timer.c
index 2a00c22..f739dfb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -896,7 +896,7 @@
{
tvec_base_t *base = __get_cpu_var(tvec_bases);
- hrtimer_run_queues();
+ hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
@@ -907,6 +907,7 @@
*/
void run_local_timers(void)
{
+ hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
softlockup_tick();
}