blob: aab880677ce0fdf0b94a079e6eff54eed4bfd0a4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
6 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7 */
8
9#include <linux/module.h>
10#include <linux/kernel_stat.h>
11#include <linux/interrupt.h>
12#include <linux/init.h>
13#include <linux/mm.h>
14#include <linux/notifier.h>
15#include <linux/percpu.h>
16#include <linux/cpu.h>
17#include <linux/kthread.h>
18#include <linux/rcupdate.h>
Andrew Morton78eef012006-03-22 00:08:16 -080019#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21#include <asm/irq.h>
22/*
23 - No shared variables, all the data are CPU local.
24 - If a softirq needs serialization, let it serialize itself
25 by its own spinlocks.
26 - Even if softirq is serialized, only local cpu is marked for
27 execution. Hence, we get something sort of weak cpu binding.
28 Though it is still not clear, will it result in better locality
29 or will not.
30
31 Examples:
32 - NET RX softirq. It is multithreaded and does not require
33 any global serialization.
34 - NET TX softirq. It kicks software netdevice queues, hence
35 it is logically serialized per device, but this serialization
36 is invisible to common code.
37 - Tasklets: serialized wrt itself.
38 */
39
40#ifndef __ARCH_IRQ_STAT
41irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42EXPORT_SYMBOL(irq_stat);
43#endif
44
45static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49/*
50 * we cannot loop indefinitely here to avoid userspace starvation,
51 * but we also don't want to introduce a worst case 1/HZ latency
52 * to the pending events, so lets the scheduler to balance
53 * the softirq load for us.
54 */
55static inline void wakeup_softirqd(void)
56{
57 /* Interrupts are disabled: no need to stop preemption */
58 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60 if (tsk && tsk->state != TASK_RUNNING)
61 wake_up_process(tsk);
62}
63
64/*
Ingo Molnarde30a2b2006-07-03 00:24:42 -070065 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
68static void __local_bh_disable(unsigned long ip)
69{
70 unsigned long flags;
71
72 WARN_ON_ONCE(in_irq());
73
74 raw_local_irq_save(flags);
75 add_preempt_count(SOFTIRQ_OFFSET);
76 /*
77 * Were softirqs turned off above:
78 */
79 if (softirq_count() == SOFTIRQ_OFFSET)
80 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags);
82}
83
84void local_bh_disable(void)
85{
86 __local_bh_disable((unsigned long)__builtin_return_address(0));
87}
88
89EXPORT_SYMBOL(local_bh_disable);
90
91void __local_bh_enable(void)
92{
93 WARN_ON_ONCE(in_irq());
94
95 /*
96 * softirqs should never be enabled by __local_bh_enable(),
97 * it always nests inside local_bh_enable() sections:
98 */
99 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101 sub_preempt_count(SOFTIRQ_OFFSET);
102}
103EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105/*
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
109 */
110void _local_bh_enable(void)
111{
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
114
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
118}
119
120EXPORT_SYMBOL(_local_bh_enable);
121
122void local_bh_enable(void)
123{
124 unsigned long flags;
125
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(irqs_disabled());
128
129 local_irq_save(flags);
130 /*
131 * Are softirqs going to be turned on now:
132 */
133 if (softirq_count() == SOFTIRQ_OFFSET)
134 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 /*
136 * Keep preemption disabled until we are done with
137 * softirq processing:
138 */
139 sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141 if (unlikely(!in_interrupt() && local_softirq_pending()))
142 do_softirq();
143
144 dec_preempt_count();
145 local_irq_restore(flags);
146 preempt_check_resched();
147}
148EXPORT_SYMBOL(local_bh_enable);
149
150void local_bh_enable_ip(unsigned long ip)
151{
152 unsigned long flags;
153
154 WARN_ON_ONCE(in_irq());
155
156 local_irq_save(flags);
157 /*
158 * Are softirqs going to be turned on now:
159 */
160 if (softirq_count() == SOFTIRQ_OFFSET)
161 trace_softirqs_on(ip);
162 /*
163 * Keep preemption disabled until we are done with
164 * softirq processing:
165 */
166 sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168 if (unlikely(!in_interrupt() && local_softirq_pending()))
169 do_softirq();
170
171 dec_preempt_count();
172 local_irq_restore(flags);
173 preempt_check_resched();
174}
175EXPORT_SYMBOL(local_bh_enable_ip);
176
177/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
179 * and we fall back to softirqd after that.
180 *
181 * This number has been established via experimentation.
182 * The two things to balance is latency against fairness -
183 * we want to handle softirqs as soon as possible, but they
184 * should not be able to lock up the box.
185 */
186#define MAX_SOFTIRQ_RESTART 10
187
188asmlinkage void __do_softirq(void)
189{
190 struct softirq_action *h;
191 __u32 pending;
192 int max_restart = MAX_SOFTIRQ_RESTART;
193 int cpu;
194
195 pending = local_softirq_pending();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700196 account_system_vtime(current);
197
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700198 __local_bh_disable((unsigned long)__builtin_return_address(0));
199 trace_softirq_enter();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 cpu = smp_processor_id();
202restart:
203 /* Reset the pending bitmask before enabling irqs */
Andi Kleen3f744782005-09-12 18:49:24 +0200204 set_softirq_pending(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700206 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
208 h = softirq_vec;
209
210 do {
211 if (pending & 1) {
212 h->action(h);
213 rcu_bh_qsctr_inc(cpu);
214 }
215 h++;
216 pending >>= 1;
217 } while (pending);
218
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700219 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220
221 pending = local_softirq_pending();
222 if (pending && --max_restart)
223 goto restart;
224
225 if (pending)
226 wakeup_softirqd();
227
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700228 trace_softirq_exit();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700229
230 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700231 _local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232}
233
234#ifndef __ARCH_HAS_DO_SOFTIRQ
235
236asmlinkage void do_softirq(void)
237{
238 __u32 pending;
239 unsigned long flags;
240
241 if (in_interrupt())
242 return;
243
244 local_irq_save(flags);
245
246 pending = local_softirq_pending();
247
248 if (pending)
249 __do_softirq();
250
251 local_irq_restore(flags);
252}
253
254EXPORT_SYMBOL(do_softirq);
255
256#endif
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
259# define invoke_softirq() __do_softirq()
260#else
261# define invoke_softirq() do_softirq()
262#endif
263
264/*
265 * Exit an interrupt context. Process softirqs if needed and possible:
266 */
267void irq_exit(void)
268{
269 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700270 trace_hardirq_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 sub_preempt_count(IRQ_EXIT_OFFSET);
272 if (!in_interrupt() && local_softirq_pending())
273 invoke_softirq();
274 preempt_enable_no_resched();
275}
276
277/*
278 * This function must run with irqs disabled!
279 */
280inline fastcall void raise_softirq_irqoff(unsigned int nr)
281{
282 __raise_softirq_irqoff(nr);
283
284 /*
285 * If we're in an interrupt or softirq, we're done
286 * (this also catches softirq-disabled code). We will
287 * actually run the softirq once we return from
288 * the irq or softirq.
289 *
290 * Otherwise we wake up ksoftirqd to make sure we
291 * schedule the softirq soon.
292 */
293 if (!in_interrupt())
294 wakeup_softirqd();
295}
296
297EXPORT_SYMBOL(raise_softirq_irqoff);
298
299void fastcall raise_softirq(unsigned int nr)
300{
301 unsigned long flags;
302
303 local_irq_save(flags);
304 raise_softirq_irqoff(nr);
305 local_irq_restore(flags);
306}
307
308void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
309{
310 softirq_vec[nr].data = data;
311 softirq_vec[nr].action = action;
312}
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/* Tasklets */
315struct tasklet_head
316{
317 struct tasklet_struct *list;
318};
319
320/* Some compilers disobey section attribute on statics when not
321 initialized -- RR */
322static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
323static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
324
325void fastcall __tasklet_schedule(struct tasklet_struct *t)
326{
327 unsigned long flags;
328
329 local_irq_save(flags);
330 t->next = __get_cpu_var(tasklet_vec).list;
331 __get_cpu_var(tasklet_vec).list = t;
332 raise_softirq_irqoff(TASKLET_SOFTIRQ);
333 local_irq_restore(flags);
334}
335
336EXPORT_SYMBOL(__tasklet_schedule);
337
338void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
339{
340 unsigned long flags;
341
342 local_irq_save(flags);
343 t->next = __get_cpu_var(tasklet_hi_vec).list;
344 __get_cpu_var(tasklet_hi_vec).list = t;
345 raise_softirq_irqoff(HI_SOFTIRQ);
346 local_irq_restore(flags);
347}
348
349EXPORT_SYMBOL(__tasklet_hi_schedule);
350
351static void tasklet_action(struct softirq_action *a)
352{
353 struct tasklet_struct *list;
354
355 local_irq_disable();
356 list = __get_cpu_var(tasklet_vec).list;
357 __get_cpu_var(tasklet_vec).list = NULL;
358 local_irq_enable();
359
360 while (list) {
361 struct tasklet_struct *t = list;
362
363 list = list->next;
364
365 if (tasklet_trylock(t)) {
366 if (!atomic_read(&t->count)) {
367 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
368 BUG();
369 t->func(t->data);
370 tasklet_unlock(t);
371 continue;
372 }
373 tasklet_unlock(t);
374 }
375
376 local_irq_disable();
377 t->next = __get_cpu_var(tasklet_vec).list;
378 __get_cpu_var(tasklet_vec).list = t;
379 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
380 local_irq_enable();
381 }
382}
383
384static void tasklet_hi_action(struct softirq_action *a)
385{
386 struct tasklet_struct *list;
387
388 local_irq_disable();
389 list = __get_cpu_var(tasklet_hi_vec).list;
390 __get_cpu_var(tasklet_hi_vec).list = NULL;
391 local_irq_enable();
392
393 while (list) {
394 struct tasklet_struct *t = list;
395
396 list = list->next;
397
398 if (tasklet_trylock(t)) {
399 if (!atomic_read(&t->count)) {
400 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
401 BUG();
402 t->func(t->data);
403 tasklet_unlock(t);
404 continue;
405 }
406 tasklet_unlock(t);
407 }
408
409 local_irq_disable();
410 t->next = __get_cpu_var(tasklet_hi_vec).list;
411 __get_cpu_var(tasklet_hi_vec).list = t;
412 __raise_softirq_irqoff(HI_SOFTIRQ);
413 local_irq_enable();
414 }
415}
416
417
418void tasklet_init(struct tasklet_struct *t,
419 void (*func)(unsigned long), unsigned long data)
420{
421 t->next = NULL;
422 t->state = 0;
423 atomic_set(&t->count, 0);
424 t->func = func;
425 t->data = data;
426}
427
428EXPORT_SYMBOL(tasklet_init);
429
430void tasklet_kill(struct tasklet_struct *t)
431{
432 if (in_interrupt())
433 printk("Attempt to kill tasklet from interrupt\n");
434
435 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
436 do
437 yield();
438 while (test_bit(TASKLET_STATE_SCHED, &t->state));
439 }
440 tasklet_unlock_wait(t);
441 clear_bit(TASKLET_STATE_SCHED, &t->state);
442}
443
444EXPORT_SYMBOL(tasklet_kill);
445
446void __init softirq_init(void)
447{
448 open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
449 open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
450}
451
452static int ksoftirqd(void * __bind_cpu)
453{
454 set_user_nice(current, 19);
455 current->flags |= PF_NOFREEZE;
456
457 set_current_state(TASK_INTERRUPTIBLE);
458
459 while (!kthread_should_stop()) {
460 preempt_disable();
461 if (!local_softirq_pending()) {
462 preempt_enable_no_resched();
463 schedule();
464 preempt_disable();
465 }
466
467 __set_current_state(TASK_RUNNING);
468
469 while (local_softirq_pending()) {
470 /* Preempt disable stops cpu going offline.
471 If already offline, we'll be on wrong CPU:
472 don't process */
473 if (cpu_is_offline((long)__bind_cpu))
474 goto wait_to_die;
475 do_softirq();
476 preempt_enable_no_resched();
477 cond_resched();
478 preempt_disable();
479 }
480 preempt_enable();
481 set_current_state(TASK_INTERRUPTIBLE);
482 }
483 __set_current_state(TASK_RUNNING);
484 return 0;
485
486wait_to_die:
487 preempt_enable();
488 /* Wait for kthread_stop */
489 set_current_state(TASK_INTERRUPTIBLE);
490 while (!kthread_should_stop()) {
491 schedule();
492 set_current_state(TASK_INTERRUPTIBLE);
493 }
494 __set_current_state(TASK_RUNNING);
495 return 0;
496}
497
498#ifdef CONFIG_HOTPLUG_CPU
499/*
500 * tasklet_kill_immediate is called to remove a tasklet which can already be
501 * scheduled for execution on @cpu.
502 *
503 * Unlike tasklet_kill, this function removes the tasklet
504 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
505 *
506 * When this function is called, @cpu must be in the CPU_DEAD state.
507 */
508void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
509{
510 struct tasklet_struct **i;
511
512 BUG_ON(cpu_online(cpu));
513 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
514
515 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
516 return;
517
518 /* CPU is dead, so no lock needed. */
519 for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
520 if (*i == t) {
521 *i = t->next;
522 return;
523 }
524 }
525 BUG();
526}
527
528static void takeover_tasklets(unsigned int cpu)
529{
530 struct tasklet_struct **i;
531
532 /* CPU is dead, so no lock needed. */
533 local_irq_disable();
534
535 /* Find end, append list for that CPU. */
536 for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
537 *i = per_cpu(tasklet_vec, cpu).list;
538 per_cpu(tasklet_vec, cpu).list = NULL;
539 raise_softirq_irqoff(TASKLET_SOFTIRQ);
540
541 for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
542 *i = per_cpu(tasklet_hi_vec, cpu).list;
543 per_cpu(tasklet_hi_vec, cpu).list = NULL;
544 raise_softirq_irqoff(HI_SOFTIRQ);
545
546 local_irq_enable();
547}
548#endif /* CONFIG_HOTPLUG_CPU */
549
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700550static int __cpuinit cpu_callback(struct notifier_block *nfb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 unsigned long action,
552 void *hcpu)
553{
554 int hotcpu = (unsigned long)hcpu;
555 struct task_struct *p;
556
557 switch (action) {
558 case CPU_UP_PREPARE:
559 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
560 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
561 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
562 if (IS_ERR(p)) {
563 printk("ksoftirqd for %i failed\n", hotcpu);
564 return NOTIFY_BAD;
565 }
566 kthread_bind(p, hotcpu);
567 per_cpu(ksoftirqd, hotcpu) = p;
568 break;
569 case CPU_ONLINE:
570 wake_up_process(per_cpu(ksoftirqd, hotcpu));
571 break;
572#ifdef CONFIG_HOTPLUG_CPU
573 case CPU_UP_CANCELED:
Heiko Carstensfc75cdf2006-06-25 05:49:10 -0700574 if (!per_cpu(ksoftirqd, hotcpu))
575 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 /* Unbind so it can run. Fall thru. */
Heiko Carstensa4c4af72005-11-07 00:58:38 -0800577 kthread_bind(per_cpu(ksoftirqd, hotcpu),
578 any_online_cpu(cpu_online_map));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 case CPU_DEAD:
580 p = per_cpu(ksoftirqd, hotcpu);
581 per_cpu(ksoftirqd, hotcpu) = NULL;
582 kthread_stop(p);
583 takeover_tasklets(hotcpu);
584 break;
585#endif /* CONFIG_HOTPLUG_CPU */
586 }
587 return NOTIFY_OK;
588}
589
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700590static struct notifier_block __cpuinitdata cpu_nfb = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 .notifier_call = cpu_callback
592};
593
594__init int spawn_ksoftirqd(void)
595{
596 void *cpu = (void *)(long)smp_processor_id();
597 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
598 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
599 register_cpu_notifier(&cpu_nfb);
600 return 0;
601}
Andrew Morton78eef012006-03-22 00:08:16 -0800602
603#ifdef CONFIG_SMP
604/*
605 * Call a function on all processors
606 */
607int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
608{
609 int ret = 0;
610
611 preempt_disable();
612 ret = smp_call_function(func, info, retry, wait);
613 local_irq_disable();
614 func(info);
615 local_irq_enable();
616 preempt_enable();
617 return ret;
618}
619EXPORT_SYMBOL(on_each_cpu);
620#endif