blob: f2caf96993f851050a90b88d449ff13084da1c58 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * mm/pdflush.c - worker threads for writing back filesystem data
3 *
4 * Copyright (C) 2002, Linus Torvalds.
5 *
Francois Camie1f8e872008-10-15 22:01:59 -07006 * 09Apr2002 Andrew Morton
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * Initial version
8 * 29Feb2004 kaos@sgi.com
9 * Move worker thread creation to kthread to avoid chewing
10 * up stack space with nested calls to kernel_thread.
11 */
12
13#include <linux/sched.h>
14#include <linux/list.h>
15#include <linux/signal.h>
16#include <linux/spinlock.h>
17#include <linux/gfp.h>
18#include <linux/init.h>
19#include <linux/module.h>
Pavel Machekf5264482008-04-21 22:15:06 +000020#include <linux/fs.h> /* Needed by writeback.h */
21#include <linux/writeback.h> /* Prototypes pdflush_operation() */
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/kthread.h>
Paul Jackson28a42b92005-10-30 15:02:32 -080023#include <linux/cpuset.h>
Nigel Cunningham7dfb7102006-12-06 20:34:23 -080024#include <linux/freezer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
26
27/*
28 * Minimum and maximum number of pdflush instances
29 */
30#define MIN_PDFLUSH_THREADS 2
31#define MAX_PDFLUSH_THREADS 8
32
33static void start_one_pdflush_thread(void);
34
35
36/*
37 * The pdflush threads are worker threads for writing back dirty data.
38 * Ideally, we'd like one thread per active disk spindle. But the disk
39 * topology is very hard to divine at this level. Instead, we take
40 * care in various places to prevent more than one pdflush thread from
41 * performing writeback against a single filesystem. pdflush threads
42 * have the PF_FLUSHER flag set in current->flags to aid in this.
43 */
44
45/*
46 * All the pdflush threads. Protected by pdflush_lock
47 */
48static LIST_HEAD(pdflush_list);
49static DEFINE_SPINLOCK(pdflush_lock);
50
51/*
52 * The count of currently-running pdflush threads. Protected
53 * by pdflush_lock.
54 *
55 * Readable by sysctl, but not writable. Published to userspace at
56 * /proc/sys/vm/nr_pdflush_threads.
57 */
58int nr_pdflush_threads = 0;
59
60/*
Peter W Morrealefafd6882009-04-06 19:00:29 -070061 * The max/min number of pdflush threads. R/W by sysctl at
62 * /proc/sys/vm/nr_pdflush_threads_max/min
63 */
64int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS;
65int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS;
66
67
68/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 * The time at which the pdflush thread pool last went empty
70 */
71static unsigned long last_empty_jifs;
72
73/*
74 * The pdflush thread.
75 *
76 * Thread pool management algorithm:
77 *
78 * - The minimum and maximum number of pdflush instances are bound
Peter W Morrealefafd6882009-04-06 19:00:29 -070079 * by nr_pdflush_threads_min and nr_pdflush_threads_max.
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 *
81 * - If there have been no idle pdflush instances for 1 second, create
82 * a new one.
83 *
84 * - If the least-recently-went-to-sleep pdflush thread has been asleep
85 * for more than one second, terminate a thread.
86 */
87
88/*
89 * A structure for passing work to a pdflush thread. Also for passing
90 * state information between pdflush threads. Protected by pdflush_lock.
91 */
92struct pdflush_work {
93 struct task_struct *who; /* The thread */
94 void (*fn)(unsigned long); /* A callback function */
95 unsigned long arg0; /* An argument to the callback */
96 struct list_head list; /* On pdflush_list, when idle */
97 unsigned long when_i_went_to_sleep;
98};
99
100static int __pdflush(struct pdflush_work *my_work)
101{
Christoph Lameter930d9152006-01-08 01:00:47 -0800102 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
Rafael J. Wysocki83144182007-07-17 04:03:35 -0700103 set_freezable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 my_work->fn = NULL;
105 my_work->who = current;
106 INIT_LIST_HEAD(&my_work->list);
107
108 spin_lock_irq(&pdflush_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 for ( ; ; ) {
110 struct pdflush_work *pdf;
111
112 set_current_state(TASK_INTERRUPTIBLE);
113 list_move(&my_work->list, &pdflush_list);
114 my_work->when_i_went_to_sleep = jiffies;
115 spin_unlock_irq(&pdflush_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 schedule();
Andrew Mortond616e09a2006-06-25 05:47:46 -0700117 try_to_freeze();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 spin_lock_irq(&pdflush_lock);
119 if (!list_empty(&my_work->list)) {
Andrew Mortond616e09a2006-06-25 05:47:46 -0700120 /*
121 * Someone woke us up, but without removing our control
122 * structure from the global list. swsusp will do this
123 * in try_to_freeze()->refrigerator(). Handle it.
124 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 my_work->fn = NULL;
126 continue;
127 }
128 if (my_work->fn == NULL) {
Andrew Mortond616e09a2006-06-25 05:47:46 -0700129 printk("pdflush: bogus wakeup\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 continue;
131 }
132 spin_unlock_irq(&pdflush_lock);
133
134 (*my_work->fn)(my_work->arg0);
135
Peter W Morrealea56ed662009-04-06 19:00:28 -0700136 spin_lock_irq(&pdflush_lock);
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 /*
139 * Thread creation: For how long have there been zero
140 * available threads?
Peter W Morrealea56ed662009-04-06 19:00:28 -0700141 *
142 * To throttle creation, we reset last_empty_jifs.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 */
OGAWA Hirofumi2b4bc462008-07-25 01:45:42 -0700144 if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
Peter W Morrealefafd6882009-04-06 19:00:29 -0700145 if (list_empty(&pdflush_list) &&
146 nr_pdflush_threads < nr_pdflush_threads_max) {
147 last_empty_jifs = jiffies;
148 nr_pdflush_threads++;
149 spin_unlock_irq(&pdflush_lock);
150 start_one_pdflush_thread();
151 spin_lock_irq(&pdflush_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 }
153 }
154
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 my_work->fn = NULL;
156
157 /*
158 * Thread destruction: For how long has the sleepiest
159 * thread slept?
160 */
161 if (list_empty(&pdflush_list))
162 continue;
Peter W Morrealefafd6882009-04-06 19:00:29 -0700163 if (nr_pdflush_threads <= nr_pdflush_threads_min)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 continue;
165 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
OGAWA Hirofumi2b4bc462008-07-25 01:45:42 -0700166 if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 /* Limit exit rate */
168 pdf->when_i_went_to_sleep = jiffies;
169 break; /* exeunt */
170 }
171 }
172 nr_pdflush_threads--;
173 spin_unlock_irq(&pdflush_lock);
174 return 0;
175}
176
177/*
178 * Of course, my_work wants to be just a local in __pdflush(). It is
179 * separated out in this manner to hopefully prevent the compiler from
180 * performing unfortunate optimisations against the auto variables. Because
181 * these are visible to other tasks and CPUs. (No problem has actually
182 * been observed. This is just paranoia).
183 */
184static int pdflush(void *dummy)
185{
186 struct pdflush_work my_work;
Rusty Russell174596a2009-01-01 10:12:29 +1030187 cpumask_var_t cpus_allowed;
188
189 /*
190 * Since the caller doesn't even check kthread_run() worked, let's not
191 * freak out too much if this fails.
192 */
193 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
194 printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
195 return 0;
196 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
198 /*
199 * pdflush can spend a lot of time doing encryption via dm-crypt. We
200 * don't want to do that at keventd's priority.
201 */
202 set_user_nice(current, 0);
Paul Jackson28a42b92005-10-30 15:02:32 -0800203
204 /*
205 * Some configs put our parent kthread in a limited cpuset,
Rusty Russell1a2142a2009-03-30 22:05:10 -0600206 * which kthread() overrides, forcing cpus_allowed == cpu_all_mask.
Paul Jackson28a42b92005-10-30 15:02:32 -0800207 * Our needs are more modest - cut back to our cpusets cpus_allowed.
208 * This is needed as pdflush's are dynamically created and destroyed.
209 * The boottime pdflush's are easily placed w/o these 2 lines.
210 */
Rusty Russell174596a2009-01-01 10:12:29 +1030211 cpuset_cpus_allowed(current, cpus_allowed);
212 set_cpus_allowed_ptr(current, cpus_allowed);
213 free_cpumask_var(cpus_allowed);
Paul Jackson28a42b92005-10-30 15:02:32 -0800214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 return __pdflush(&my_work);
216}
217
218/*
219 * Attempt to wake up a pdflush thread, and get it to do some work for you.
220 * Returns zero if it indeed managed to find a worker thread, and passed your
221 * payload to it.
222 */
223int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
224{
225 unsigned long flags;
226 int ret = 0;
227
Eric Sesterhenn125e1872006-06-23 02:06:06 -0700228 BUG_ON(fn == NULL); /* Hard to diagnose if it's deferred */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229
230 spin_lock_irqsave(&pdflush_lock, flags);
231 if (list_empty(&pdflush_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 ret = -1;
233 } else {
234 struct pdflush_work *pdf;
235
236 pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
237 list_del_init(&pdf->list);
238 if (list_empty(&pdflush_list))
239 last_empty_jifs = jiffies;
240 pdf->fn = fn;
241 pdf->arg0 = arg0;
242 wake_up_process(pdf->who);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 }
Denis Cheng5aecd552008-05-12 14:02:14 -0700244 spin_unlock_irqrestore(&pdflush_lock, flags);
245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 return ret;
247}
248
249static void start_one_pdflush_thread(void)
250{
Peter W Morrealea56ed662009-04-06 19:00:28 -0700251 struct task_struct *k;
252
253 k = kthread_run(pdflush, NULL, "pdflush");
254 if (unlikely(IS_ERR(k))) {
255 spin_lock_irq(&pdflush_lock);
256 nr_pdflush_threads--;
257 spin_unlock_irq(&pdflush_lock);
258 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259}
260
261static int __init pdflush_init(void)
262{
263 int i;
264
Peter W Morrealea56ed662009-04-06 19:00:28 -0700265 /*
266 * Pre-set nr_pdflush_threads... If we fail to create,
267 * the count will be decremented.
268 */
Peter W Morrealefafd6882009-04-06 19:00:29 -0700269 nr_pdflush_threads = nr_pdflush_threads_min;
Peter W Morrealea56ed662009-04-06 19:00:28 -0700270
Peter W Morrealefafd6882009-04-06 19:00:29 -0700271 for (i = 0; i < nr_pdflush_threads_min; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 start_one_pdflush_thread();
273 return 0;
274}
275
276module_init(pdflush_init);