blob: 14e1a14f94d20538f269049642d8a455f6dfb9ef [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
6 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7 */
8
9#include <linux/module.h>
10#include <linux/kernel_stat.h>
11#include <linux/interrupt.h>
12#include <linux/init.h>
13#include <linux/mm.h>
14#include <linux/notifier.h>
15#include <linux/percpu.h>
16#include <linux/cpu.h>
17#include <linux/kthread.h>
18#include <linux/rcupdate.h>
Andrew Morton78eef012006-03-22 00:08:16 -080019#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21#include <asm/irq.h>
22/*
23 - No shared variables, all the data are CPU local.
24 - If a softirq needs serialization, let it serialize itself
25 by its own spinlocks.
26 - Even if softirq is serialized, only local cpu is marked for
27 execution. Hence, we get something sort of weak cpu binding.
28 Though it is still not clear, will it result in better locality
29 or will not.
30
31 Examples:
32 - NET RX softirq. It is multithreaded and does not require
33 any global serialization.
34 - NET TX softirq. It kicks software netdevice queues, hence
35 it is logically serialized per device, but this serialization
36 is invisible to common code.
37 - Tasklets: serialized wrt itself.
38 */
39
40#ifndef __ARCH_IRQ_STAT
41irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42EXPORT_SYMBOL(irq_stat);
43#endif
44
45static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49/*
50 * we cannot loop indefinitely here to avoid userspace starvation,
51 * but we also don't want to introduce a worst case 1/HZ latency
52 * to the pending events, so lets the scheduler to balance
53 * the softirq load for us.
54 */
55static inline void wakeup_softirqd(void)
56{
57 /* Interrupts are disabled: no need to stop preemption */
58 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60 if (tsk && tsk->state != TASK_RUNNING)
61 wake_up_process(tsk);
62}
63
64/*
Ingo Molnarde30a2b2006-07-03 00:24:42 -070065 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
Tim Chen3c829c32006-07-30 03:04:02 -070068#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -070069static void __local_bh_disable(unsigned long ip)
70{
71 unsigned long flags;
72
73 WARN_ON_ONCE(in_irq());
74
75 raw_local_irq_save(flags);
76 add_preempt_count(SOFTIRQ_OFFSET);
77 /*
78 * Were softirqs turned off above:
79 */
80 if (softirq_count() == SOFTIRQ_OFFSET)
81 trace_softirqs_off(ip);
82 raw_local_irq_restore(flags);
83}
Tim Chen3c829c32006-07-30 03:04:02 -070084#else /* !CONFIG_TRACE_IRQFLAGS */
85static inline void __local_bh_disable(unsigned long ip)
86{
87 add_preempt_count(SOFTIRQ_OFFSET);
88 barrier();
89}
90#endif /* CONFIG_TRACE_IRQFLAGS */
Ingo Molnarde30a2b2006-07-03 00:24:42 -070091
92void local_bh_disable(void)
93{
94 __local_bh_disable((unsigned long)__builtin_return_address(0));
95}
96
97EXPORT_SYMBOL(local_bh_disable);
98
99void __local_bh_enable(void)
100{
101 WARN_ON_ONCE(in_irq());
102
103 /*
104 * softirqs should never be enabled by __local_bh_enable(),
105 * it always nests inside local_bh_enable() sections:
106 */
107 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
108
109 sub_preempt_count(SOFTIRQ_OFFSET);
110}
111EXPORT_SYMBOL_GPL(__local_bh_enable);
112
113/*
114 * Special-case - softirqs can safely be enabled in
115 * cond_resched_softirq(), or by __do_softirq(),
116 * without processing still-pending softirqs:
117 */
118void _local_bh_enable(void)
119{
120 WARN_ON_ONCE(in_irq());
121 WARN_ON_ONCE(!irqs_disabled());
122
123 if (softirq_count() == SOFTIRQ_OFFSET)
124 trace_softirqs_on((unsigned long)__builtin_return_address(0));
125 sub_preempt_count(SOFTIRQ_OFFSET);
126}
127
128EXPORT_SYMBOL(_local_bh_enable);
129
130void local_bh_enable(void)
131{
Tim Chen3c829c32006-07-30 03:04:02 -0700132#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700133 unsigned long flags;
134
135 WARN_ON_ONCE(in_irq());
Tim Chen3c829c32006-07-30 03:04:02 -0700136#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700137 WARN_ON_ONCE(irqs_disabled());
138
Tim Chen3c829c32006-07-30 03:04:02 -0700139#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700140 local_irq_save(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700141#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700142 /*
143 * Are softirqs going to be turned on now:
144 */
145 if (softirq_count() == SOFTIRQ_OFFSET)
146 trace_softirqs_on((unsigned long)__builtin_return_address(0));
147 /*
148 * Keep preemption disabled until we are done with
149 * softirq processing:
150 */
151 sub_preempt_count(SOFTIRQ_OFFSET - 1);
152
153 if (unlikely(!in_interrupt() && local_softirq_pending()))
154 do_softirq();
155
156 dec_preempt_count();
Tim Chen3c829c32006-07-30 03:04:02 -0700157#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700158 local_irq_restore(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700159#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700160 preempt_check_resched();
161}
162EXPORT_SYMBOL(local_bh_enable);
163
164void local_bh_enable_ip(unsigned long ip)
165{
Tim Chen3c829c32006-07-30 03:04:02 -0700166#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700167 unsigned long flags;
168
169 WARN_ON_ONCE(in_irq());
170
171 local_irq_save(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700172#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700173 /*
174 * Are softirqs going to be turned on now:
175 */
176 if (softirq_count() == SOFTIRQ_OFFSET)
177 trace_softirqs_on(ip);
178 /*
179 * Keep preemption disabled until we are done with
180 * softirq processing:
181 */
182 sub_preempt_count(SOFTIRQ_OFFSET - 1);
183
184 if (unlikely(!in_interrupt() && local_softirq_pending()))
185 do_softirq();
186
187 dec_preempt_count();
Tim Chen3c829c32006-07-30 03:04:02 -0700188#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700189 local_irq_restore(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700190#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700191 preempt_check_resched();
192}
193EXPORT_SYMBOL(local_bh_enable_ip);
194
195/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
197 * and we fall back to softirqd after that.
198 *
199 * This number has been established via experimentation.
200 * The two things to balance is latency against fairness -
201 * we want to handle softirqs as soon as possible, but they
202 * should not be able to lock up the box.
203 */
204#define MAX_SOFTIRQ_RESTART 10
205
206asmlinkage void __do_softirq(void)
207{
208 struct softirq_action *h;
209 __u32 pending;
210 int max_restart = MAX_SOFTIRQ_RESTART;
211 int cpu;
212
213 pending = local_softirq_pending();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700214 account_system_vtime(current);
215
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700216 __local_bh_disable((unsigned long)__builtin_return_address(0));
217 trace_softirq_enter();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 cpu = smp_processor_id();
220restart:
221 /* Reset the pending bitmask before enabling irqs */
Andi Kleen3f744782005-09-12 18:49:24 +0200222 set_softirq_pending(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700224 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
226 h = softirq_vec;
227
228 do {
229 if (pending & 1) {
230 h->action(h);
231 rcu_bh_qsctr_inc(cpu);
232 }
233 h++;
234 pending >>= 1;
235 } while (pending);
236
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700237 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 pending = local_softirq_pending();
240 if (pending && --max_restart)
241 goto restart;
242
243 if (pending)
244 wakeup_softirqd();
245
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700246 trace_softirq_exit();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700247
248 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700249 _local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
252#ifndef __ARCH_HAS_DO_SOFTIRQ
253
254asmlinkage void do_softirq(void)
255{
256 __u32 pending;
257 unsigned long flags;
258
259 if (in_interrupt())
260 return;
261
262 local_irq_save(flags);
263
264 pending = local_softirq_pending();
265
266 if (pending)
267 __do_softirq();
268
269 local_irq_restore(flags);
270}
271
272EXPORT_SYMBOL(do_softirq);
273
274#endif
275
Ingo Molnardde4b2b2007-02-16 01:27:45 -0800276/*
277 * Enter an interrupt context.
278 */
279void irq_enter(void)
280{
281 account_system_vtime(current);
282 add_preempt_count(HARDIRQ_OFFSET);
283 trace_hardirq_enter();
284}
285
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
287# define invoke_softirq() __do_softirq()
288#else
289# define invoke_softirq() do_softirq()
290#endif
291
292/*
293 * Exit an interrupt context. Process softirqs if needed and possible:
294 */
295void irq_exit(void)
296{
297 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700298 trace_hardirq_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 sub_preempt_count(IRQ_EXIT_OFFSET);
300 if (!in_interrupt() && local_softirq_pending())
301 invoke_softirq();
302 preempt_enable_no_resched();
303}
304
305/*
306 * This function must run with irqs disabled!
307 */
308inline fastcall void raise_softirq_irqoff(unsigned int nr)
309{
310 __raise_softirq_irqoff(nr);
311
312 /*
313 * If we're in an interrupt or softirq, we're done
314 * (this also catches softirq-disabled code). We will
315 * actually run the softirq once we return from
316 * the irq or softirq.
317 *
318 * Otherwise we wake up ksoftirqd to make sure we
319 * schedule the softirq soon.
320 */
321 if (!in_interrupt())
322 wakeup_softirqd();
323}
324
325EXPORT_SYMBOL(raise_softirq_irqoff);
326
327void fastcall raise_softirq(unsigned int nr)
328{
329 unsigned long flags;
330
331 local_irq_save(flags);
332 raise_softirq_irqoff(nr);
333 local_irq_restore(flags);
334}
335
336void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
337{
338 softirq_vec[nr].data = data;
339 softirq_vec[nr].action = action;
340}
341
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342/* Tasklets */
343struct tasklet_head
344{
345 struct tasklet_struct *list;
346};
347
348/* Some compilers disobey section attribute on statics when not
349 initialized -- RR */
350static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
351static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
352
353void fastcall __tasklet_schedule(struct tasklet_struct *t)
354{
355 unsigned long flags;
356
357 local_irq_save(flags);
358 t->next = __get_cpu_var(tasklet_vec).list;
359 __get_cpu_var(tasklet_vec).list = t;
360 raise_softirq_irqoff(TASKLET_SOFTIRQ);
361 local_irq_restore(flags);
362}
363
364EXPORT_SYMBOL(__tasklet_schedule);
365
366void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
367{
368 unsigned long flags;
369
370 local_irq_save(flags);
371 t->next = __get_cpu_var(tasklet_hi_vec).list;
372 __get_cpu_var(tasklet_hi_vec).list = t;
373 raise_softirq_irqoff(HI_SOFTIRQ);
374 local_irq_restore(flags);
375}
376
377EXPORT_SYMBOL(__tasklet_hi_schedule);
378
379static void tasklet_action(struct softirq_action *a)
380{
381 struct tasklet_struct *list;
382
383 local_irq_disable();
384 list = __get_cpu_var(tasklet_vec).list;
385 __get_cpu_var(tasklet_vec).list = NULL;
386 local_irq_enable();
387
388 while (list) {
389 struct tasklet_struct *t = list;
390
391 list = list->next;
392
393 if (tasklet_trylock(t)) {
394 if (!atomic_read(&t->count)) {
395 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
396 BUG();
397 t->func(t->data);
398 tasklet_unlock(t);
399 continue;
400 }
401 tasklet_unlock(t);
402 }
403
404 local_irq_disable();
405 t->next = __get_cpu_var(tasklet_vec).list;
406 __get_cpu_var(tasklet_vec).list = t;
407 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
408 local_irq_enable();
409 }
410}
411
412static void tasklet_hi_action(struct softirq_action *a)
413{
414 struct tasklet_struct *list;
415
416 local_irq_disable();
417 list = __get_cpu_var(tasklet_hi_vec).list;
418 __get_cpu_var(tasklet_hi_vec).list = NULL;
419 local_irq_enable();
420
421 while (list) {
422 struct tasklet_struct *t = list;
423
424 list = list->next;
425
426 if (tasklet_trylock(t)) {
427 if (!atomic_read(&t->count)) {
428 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
429 BUG();
430 t->func(t->data);
431 tasklet_unlock(t);
432 continue;
433 }
434 tasklet_unlock(t);
435 }
436
437 local_irq_disable();
438 t->next = __get_cpu_var(tasklet_hi_vec).list;
439 __get_cpu_var(tasklet_hi_vec).list = t;
440 __raise_softirq_irqoff(HI_SOFTIRQ);
441 local_irq_enable();
442 }
443}
444
445
446void tasklet_init(struct tasklet_struct *t,
447 void (*func)(unsigned long), unsigned long data)
448{
449 t->next = NULL;
450 t->state = 0;
451 atomic_set(&t->count, 0);
452 t->func = func;
453 t->data = data;
454}
455
456EXPORT_SYMBOL(tasklet_init);
457
458void tasklet_kill(struct tasklet_struct *t)
459{
460 if (in_interrupt())
461 printk("Attempt to kill tasklet from interrupt\n");
462
463 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
464 do
465 yield();
466 while (test_bit(TASKLET_STATE_SCHED, &t->state));
467 }
468 tasklet_unlock_wait(t);
469 clear_bit(TASKLET_STATE_SCHED, &t->state);
470}
471
472EXPORT_SYMBOL(tasklet_kill);
473
474void __init softirq_init(void)
475{
476 open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
477 open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
478}
479
480static int ksoftirqd(void * __bind_cpu)
481{
482 set_user_nice(current, 19);
483 current->flags |= PF_NOFREEZE;
484
485 set_current_state(TASK_INTERRUPTIBLE);
486
487 while (!kthread_should_stop()) {
488 preempt_disable();
489 if (!local_softirq_pending()) {
490 preempt_enable_no_resched();
491 schedule();
492 preempt_disable();
493 }
494
495 __set_current_state(TASK_RUNNING);
496
497 while (local_softirq_pending()) {
498 /* Preempt disable stops cpu going offline.
499 If already offline, we'll be on wrong CPU:
500 don't process */
501 if (cpu_is_offline((long)__bind_cpu))
502 goto wait_to_die;
503 do_softirq();
504 preempt_enable_no_resched();
505 cond_resched();
506 preempt_disable();
507 }
508 preempt_enable();
509 set_current_state(TASK_INTERRUPTIBLE);
510 }
511 __set_current_state(TASK_RUNNING);
512 return 0;
513
514wait_to_die:
515 preempt_enable();
516 /* Wait for kthread_stop */
517 set_current_state(TASK_INTERRUPTIBLE);
518 while (!kthread_should_stop()) {
519 schedule();
520 set_current_state(TASK_INTERRUPTIBLE);
521 }
522 __set_current_state(TASK_RUNNING);
523 return 0;
524}
525
526#ifdef CONFIG_HOTPLUG_CPU
527/*
528 * tasklet_kill_immediate is called to remove a tasklet which can already be
529 * scheduled for execution on @cpu.
530 *
531 * Unlike tasklet_kill, this function removes the tasklet
532 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
533 *
534 * When this function is called, @cpu must be in the CPU_DEAD state.
535 */
536void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
537{
538 struct tasklet_struct **i;
539
540 BUG_ON(cpu_online(cpu));
541 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
542
543 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
544 return;
545
546 /* CPU is dead, so no lock needed. */
547 for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
548 if (*i == t) {
549 *i = t->next;
550 return;
551 }
552 }
553 BUG();
554}
555
556static void takeover_tasklets(unsigned int cpu)
557{
558 struct tasklet_struct **i;
559
560 /* CPU is dead, so no lock needed. */
561 local_irq_disable();
562
563 /* Find end, append list for that CPU. */
564 for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
565 *i = per_cpu(tasklet_vec, cpu).list;
566 per_cpu(tasklet_vec, cpu).list = NULL;
567 raise_softirq_irqoff(TASKLET_SOFTIRQ);
568
569 for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
570 *i = per_cpu(tasklet_hi_vec, cpu).list;
571 per_cpu(tasklet_hi_vec, cpu).list = NULL;
572 raise_softirq_irqoff(HI_SOFTIRQ);
573
574 local_irq_enable();
575}
576#endif /* CONFIG_HOTPLUG_CPU */
577
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700578static int __cpuinit cpu_callback(struct notifier_block *nfb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 unsigned long action,
580 void *hcpu)
581{
582 int hotcpu = (unsigned long)hcpu;
583 struct task_struct *p;
584
585 switch (action) {
586 case CPU_UP_PREPARE:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
588 if (IS_ERR(p)) {
589 printk("ksoftirqd for %i failed\n", hotcpu);
590 return NOTIFY_BAD;
591 }
592 kthread_bind(p, hotcpu);
593 per_cpu(ksoftirqd, hotcpu) = p;
594 break;
595 case CPU_ONLINE:
596 wake_up_process(per_cpu(ksoftirqd, hotcpu));
597 break;
598#ifdef CONFIG_HOTPLUG_CPU
599 case CPU_UP_CANCELED:
Heiko Carstensfc75cdf2006-06-25 05:49:10 -0700600 if (!per_cpu(ksoftirqd, hotcpu))
601 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 /* Unbind so it can run. Fall thru. */
Heiko Carstensa4c4af72005-11-07 00:58:38 -0800603 kthread_bind(per_cpu(ksoftirqd, hotcpu),
604 any_online_cpu(cpu_online_map));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 case CPU_DEAD:
606 p = per_cpu(ksoftirqd, hotcpu);
607 per_cpu(ksoftirqd, hotcpu) = NULL;
608 kthread_stop(p);
609 takeover_tasklets(hotcpu);
610 break;
611#endif /* CONFIG_HOTPLUG_CPU */
612 }
613 return NOTIFY_OK;
614}
615
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700616static struct notifier_block __cpuinitdata cpu_nfb = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 .notifier_call = cpu_callback
618};
619
620__init int spawn_ksoftirqd(void)
621{
622 void *cpu = (void *)(long)smp_processor_id();
Akinobu Mita07dccf32006-09-29 02:00:22 -0700623 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
624
625 BUG_ON(err == NOTIFY_BAD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
627 register_cpu_notifier(&cpu_nfb);
628 return 0;
629}
Andrew Morton78eef012006-03-22 00:08:16 -0800630
631#ifdef CONFIG_SMP
632/*
633 * Call a function on all processors
634 */
635int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
636{
637 int ret = 0;
638
639 preempt_disable();
640 ret = smp_call_function(func, info, retry, wait);
641 local_irq_disable();
642 func(info);
643 local_irq_enable();
644 preempt_enable();
645 return ret;
646}
647EXPORT_SYMBOL(on_each_cpu);
648#endif