blob: b02102feeb4be03d346583f2c81c8e7aba4e3ed8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * mm/pdflush.c - worker threads for writing back filesystem data
3 *
4 * Copyright (C) 2002, Linus Torvalds.
5 *
6 * 09Apr2002 akpm@zip.com.au
7 * Initial version
8 * 29Feb2004 kaos@sgi.com
9 * Move worker thread creation to kthread to avoid chewing
10 * up stack space with nested calls to kernel_thread.
11 */
12
13#include <linux/sched.h>
14#include <linux/list.h>
15#include <linux/signal.h>
16#include <linux/spinlock.h>
17#include <linux/gfp.h>
18#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/fs.h> // Needed by writeback.h
21#include <linux/writeback.h> // Prototypes pdflush_operation()
22#include <linux/kthread.h>
Paul Jackson28a42b92005-10-30 15:02:32 -080023#include <linux/cpuset.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
25
26/*
27 * Minimum and maximum number of pdflush instances
28 */
29#define MIN_PDFLUSH_THREADS 2
30#define MAX_PDFLUSH_THREADS 8
31
32static void start_one_pdflush_thread(void);
33
34
35/*
36 * The pdflush threads are worker threads for writing back dirty data.
37 * Ideally, we'd like one thread per active disk spindle. But the disk
38 * topology is very hard to divine at this level. Instead, we take
39 * care in various places to prevent more than one pdflush thread from
40 * performing writeback against a single filesystem. pdflush threads
41 * have the PF_FLUSHER flag set in current->flags to aid in this.
42 */
43
44/*
45 * All the pdflush threads. Protected by pdflush_lock
46 */
47static LIST_HEAD(pdflush_list);
48static DEFINE_SPINLOCK(pdflush_lock);
49
50/*
51 * The count of currently-running pdflush threads. Protected
52 * by pdflush_lock.
53 *
54 * Readable by sysctl, but not writable. Published to userspace at
55 * /proc/sys/vm/nr_pdflush_threads.
56 */
57int nr_pdflush_threads = 0;
58
59/*
60 * The time at which the pdflush thread pool last went empty
61 */
62static unsigned long last_empty_jifs;
63
64/*
65 * The pdflush thread.
66 *
67 * Thread pool management algorithm:
68 *
69 * - The minimum and maximum number of pdflush instances are bound
70 * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
71 *
72 * - If there have been no idle pdflush instances for 1 second, create
73 * a new one.
74 *
75 * - If the least-recently-went-to-sleep pdflush thread has been asleep
76 * for more than one second, terminate a thread.
77 */
78
79/*
80 * A structure for passing work to a pdflush thread. Also for passing
81 * state information between pdflush threads. Protected by pdflush_lock.
82 */
83struct pdflush_work {
84 struct task_struct *who; /* The thread */
85 void (*fn)(unsigned long); /* A callback function */
86 unsigned long arg0; /* An argument to the callback */
87 struct list_head list; /* On pdflush_list, when idle */
88 unsigned long when_i_went_to_sleep;
89};
90
91static int __pdflush(struct pdflush_work *my_work)
92{
Christoph Lameter930d9152006-01-08 01:00:47 -080093 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 my_work->fn = NULL;
95 my_work->who = current;
96 INIT_LIST_HEAD(&my_work->list);
97
98 spin_lock_irq(&pdflush_lock);
99 nr_pdflush_threads++;
100 for ( ; ; ) {
101 struct pdflush_work *pdf;
102
103 set_current_state(TASK_INTERRUPTIBLE);
104 list_move(&my_work->list, &pdflush_list);
105 my_work->when_i_went_to_sleep = jiffies;
106 spin_unlock_irq(&pdflush_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 schedule();
Andrew Mortond616e092006-06-25 05:47:46 -0700108 try_to_freeze();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 spin_lock_irq(&pdflush_lock);
110 if (!list_empty(&my_work->list)) {
Andrew Mortond616e092006-06-25 05:47:46 -0700111 /*
112 * Someone woke us up, but without removing our control
113 * structure from the global list. swsusp will do this
114 * in try_to_freeze()->refrigerator(). Handle it.
115 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 my_work->fn = NULL;
117 continue;
118 }
119 if (my_work->fn == NULL) {
Andrew Mortond616e092006-06-25 05:47:46 -0700120 printk("pdflush: bogus wakeup\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 continue;
122 }
123 spin_unlock_irq(&pdflush_lock);
124
125 (*my_work->fn)(my_work->arg0);
126
127 /*
128 * Thread creation: For how long have there been zero
129 * available threads?
130 */
131 if (jiffies - last_empty_jifs > 1 * HZ) {
132 /* unlocked list_empty() test is OK here */
133 if (list_empty(&pdflush_list)) {
134 /* unlocked test is OK here */
135 if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
136 start_one_pdflush_thread();
137 }
138 }
139
140 spin_lock_irq(&pdflush_lock);
141 my_work->fn = NULL;
142
143 /*
144 * Thread destruction: For how long has the sleepiest
145 * thread slept?
146 */
147 if (list_empty(&pdflush_list))
148 continue;
149 if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
150 continue;
151 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
152 if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
153 /* Limit exit rate */
154 pdf->when_i_went_to_sleep = jiffies;
155 break; /* exeunt */
156 }
157 }
158 nr_pdflush_threads--;
159 spin_unlock_irq(&pdflush_lock);
160 return 0;
161}
162
163/*
164 * Of course, my_work wants to be just a local in __pdflush(). It is
165 * separated out in this manner to hopefully prevent the compiler from
166 * performing unfortunate optimisations against the auto variables. Because
167 * these are visible to other tasks and CPUs. (No problem has actually
168 * been observed. This is just paranoia).
169 */
170static int pdflush(void *dummy)
171{
172 struct pdflush_work my_work;
Paul Jackson28a42b92005-10-30 15:02:32 -0800173 cpumask_t cpus_allowed;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 /*
176 * pdflush can spend a lot of time doing encryption via dm-crypt. We
177 * don't want to do that at keventd's priority.
178 */
179 set_user_nice(current, 0);
Paul Jackson28a42b92005-10-30 15:02:32 -0800180
181 /*
182 * Some configs put our parent kthread in a limited cpuset,
183 * which kthread() overrides, forcing cpus_allowed == CPU_MASK_ALL.
184 * Our needs are more modest - cut back to our cpusets cpus_allowed.
185 * This is needed as pdflush's are dynamically created and destroyed.
186 * The boottime pdflush's are easily placed w/o these 2 lines.
187 */
188 cpus_allowed = cpuset_cpus_allowed(current);
189 set_cpus_allowed(current, cpus_allowed);
190
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 return __pdflush(&my_work);
192}
193
194/*
195 * Attempt to wake up a pdflush thread, and get it to do some work for you.
196 * Returns zero if it indeed managed to find a worker thread, and passed your
197 * payload to it.
198 */
199int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
200{
201 unsigned long flags;
202 int ret = 0;
203
Eric Sesterhenn125e1872006-06-23 02:06:06 -0700204 BUG_ON(fn == NULL); /* Hard to diagnose if it's deferred */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
206 spin_lock_irqsave(&pdflush_lock, flags);
207 if (list_empty(&pdflush_list)) {
208 spin_unlock_irqrestore(&pdflush_lock, flags);
209 ret = -1;
210 } else {
211 struct pdflush_work *pdf;
212
213 pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
214 list_del_init(&pdf->list);
215 if (list_empty(&pdflush_list))
216 last_empty_jifs = jiffies;
217 pdf->fn = fn;
218 pdf->arg0 = arg0;
219 wake_up_process(pdf->who);
220 spin_unlock_irqrestore(&pdflush_lock, flags);
221 }
222 return ret;
223}
224
225static void start_one_pdflush_thread(void)
226{
227 kthread_run(pdflush, NULL, "pdflush");
228}
229
230static int __init pdflush_init(void)
231{
232 int i;
233
234 for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
235 start_one_pdflush_thread();
236 return 0;
237}
238
239module_init(pdflush_init);