blob: 3c8403c012ce1b78f7354f40093e5465bf1d060f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * include/linux/backing-dev.h
3 *
4 * low-level device information and state which is propagated up through
5 * to high-level code.
6 */
7
8#ifndef _LINUX_BACKING_DEV_H
9#define _LINUX_BACKING_DEV_H
10
Peter Zijlstracf0ca9f2008-04-30 00:54:32 -070011#include <linux/kernel.h>
Miklos Szeredie4ad08f2008-04-30 00:54:37 -070012#include <linux/fs.h>
Jens Axboe03ba3782009-09-09 09:08:54 +020013#include <linux/sched.h>
Tejun Heoa212b102015-05-22 17:13:33 -040014#include <linux/blkdev.h>
Jens Axboe03ba3782009-09-09 09:08:54 +020015#include <linux/writeback.h>
Tejun Heo52ebea72015-05-22 17:13:37 -040016#include <linux/blk-cgroup.h>
Tejun Heo66114ca2015-05-22 17:13:32 -040017#include <linux/backing-dev-defs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
Mikulas Patocka8077c0d2013-10-14 12:14:13 -040019int __must_check bdi_init(struct backing_dev_info *bdi);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070020void bdi_destroy(struct backing_dev_info *bdi);
21
Joe Perchesd2cc4dd2012-11-29 08:37:03 -060022__printf(3, 4)
Peter Zijlstracf0ca9f2008-04-30 00:54:32 -070023int bdi_register(struct backing_dev_info *bdi, struct device *parent,
24 const char *fmt, ...);
25int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
26void bdi_unregister(struct backing_dev_info *bdi);
Christoph Hellwigb4caecd2015-01-14 10:42:32 +010027int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
Curt Wohlgemuth0e175a12011-10-07 21:54:10 -060028void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
29 enum wb_reason reason);
Christoph Hellwigc5444192010-06-08 18:15:15 +020030void bdi_start_background_writeback(struct backing_dev_info *bdi);
Tejun Heof0054bb2015-05-22 17:13:30 -040031void wb_workfn(struct work_struct *work);
Tejun Heod6c10f12015-05-22 17:13:45 -040032bool bdi_has_dirty_io(struct backing_dev_info *bdi);
Tejun Heof0054bb2015-05-22 17:13:30 -040033void wb_wakeup_delayed(struct bdi_writeback *wb);
Peter Zijlstracf0ca9f2008-04-30 00:54:32 -070034
Jens Axboe03ba3782009-09-09 09:08:54 +020035extern spinlock_t bdi_lock;
Jens Axboe66f3b8e2009-09-02 09:19:46 +020036extern struct list_head bdi_list;
37
Tejun Heo839a8e82013-04-01 19:08:06 -070038extern struct workqueue_struct *bdi_wq;
39
Tejun Heod6c10f12015-05-22 17:13:45 -040040static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
Jens Axboe03ba3782009-09-09 09:08:54 +020041{
Tejun Heod6c10f12015-05-22 17:13:45 -040042 return test_bit(WB_has_dirty_io, &wb->state);
Jens Axboe03ba3782009-09-09 09:08:54 +020043}
44
Tejun Heo93f78d82015-05-22 17:13:27 -040045static inline void __add_wb_stat(struct bdi_writeback *wb,
46 enum wb_stat_item item, s64 amount)
Peter Zijlstrae0bf68d2007-10-16 23:25:46 -070047{
Tejun Heo93f78d82015-05-22 17:13:27 -040048 __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
Peter Zijlstrae0bf68d2007-10-16 23:25:46 -070049}
50
Tejun Heo93f78d82015-05-22 17:13:27 -040051static inline void __inc_wb_stat(struct bdi_writeback *wb,
52 enum wb_stat_item item)
Peter Zijlstrae0bf68d2007-10-16 23:25:46 -070053{
Tejun Heo93f78d82015-05-22 17:13:27 -040054 __add_wb_stat(wb, item, 1);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070055}
56
Tejun Heo93f78d82015-05-22 17:13:27 -040057static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070058{
59 unsigned long flags;
60
61 local_irq_save(flags);
Tejun Heo93f78d82015-05-22 17:13:27 -040062 __inc_wb_stat(wb, item);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070063 local_irq_restore(flags);
64}
65
Tejun Heo93f78d82015-05-22 17:13:27 -040066static inline void __dec_wb_stat(struct bdi_writeback *wb,
67 enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070068{
Tejun Heo93f78d82015-05-22 17:13:27 -040069 __add_wb_stat(wb, item, -1);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070070}
71
Tejun Heo93f78d82015-05-22 17:13:27 -040072static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070073{
74 unsigned long flags;
75
76 local_irq_save(flags);
Tejun Heo93f78d82015-05-22 17:13:27 -040077 __dec_wb_stat(wb, item);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070078 local_irq_restore(flags);
79}
80
Tejun Heo93f78d82015-05-22 17:13:27 -040081static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070082{
Tejun Heo93f78d82015-05-22 17:13:27 -040083 return percpu_counter_read_positive(&wb->stat[item]);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070084}
85
Tejun Heo93f78d82015-05-22 17:13:27 -040086static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
87 enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070088{
Tejun Heo93f78d82015-05-22 17:13:27 -040089 return percpu_counter_sum_positive(&wb->stat[item]);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070090}
91
Tejun Heo93f78d82015-05-22 17:13:27 -040092static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070093{
94 s64 sum;
95 unsigned long flags;
96
97 local_irq_save(flags);
Tejun Heo93f78d82015-05-22 17:13:27 -040098 sum = __wb_stat_sum(wb, item);
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -070099 local_irq_restore(flags);
100
101 return sum;
102}
103
Tejun Heo93f78d82015-05-22 17:13:27 -0400104extern void wb_writeout_inc(struct bdi_writeback *wb);
Miklos Szeredidd5656e2008-04-30 00:54:37 -0700105
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -0700106/*
107 * maximal error of a stat counter.
108 */
Tejun Heo93f78d82015-05-22 17:13:27 -0400109static inline unsigned long wb_stat_error(struct bdi_writeback *wb)
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -0700110{
111#ifdef CONFIG_SMP
Tejun Heo93f78d82015-05-22 17:13:27 -0400112 return nr_cpu_ids * WB_STAT_BATCH;
Peter Zijlstrab2e8fb62007-10-16 23:25:47 -0700113#else
114 return 1;
115#endif
Peter Zijlstrae0bf68d2007-10-16 23:25:46 -0700116}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Peter Zijlstra189d3c42008-04-30 00:54:35 -0700118int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
Peter Zijlstraa42dde02008-04-30 00:54:36 -0700119int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
Peter Zijlstra189d3c42008-04-30 00:54:35 -0700120
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121/*
122 * Flags in backing_dev_info::capability
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700123 *
124 * The first three flags control whether dirty pages will contribute to the
125 * VM's accounting and whether writepages() should be called for dirty pages
126 * (something that would not, for example, be appropriate for ramfs)
127 *
128 * WARNING: these flags are closely related and should not normally be
129 * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
130 * three flags into a single convenience macro.
131 *
132 * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting
133 * BDI_CAP_NO_WRITEBACK: Don't write pages back
134 * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages
Maxim Patlasov5a537482013-09-11 14:22:46 -0700135 * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold.
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400136 *
137 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 */
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700139#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
140#define BDI_CAP_NO_WRITEBACK 0x00000002
Christoph Hellwigb4caecd2015-01-14 10:42:32 +0100141#define BDI_CAP_NO_ACCT_WB 0x00000004
142#define BDI_CAP_STABLE_WRITES 0x00000008
143#define BDI_CAP_STRICTLIMIT 0x00000010
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400144#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700146#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
147 (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
148
Jörn Engel5129a462010-04-25 08:54:42 +0200149extern struct backing_dev_info noop_backing_dev_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151int writeback_in_progress(struct backing_dev_info *bdi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
Tejun Heoa212b102015-05-22 17:13:33 -0400153static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
154{
155 struct super_block *sb;
156
157 if (!inode)
158 return &noop_backing_dev_info;
159
160 sb = inode->i_sb;
161#ifdef CONFIG_BLOCK
162 if (sb_is_blkdev_sb(sb))
163 return blk_get_backing_dev_info(I_BDEV(inode));
164#endif
165 return sb->s_bdi;
166}
167
Tejun Heoec8a6f22015-05-22 17:13:41 -0400168static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169{
Tejun Heoec8a6f22015-05-22 17:13:41 -0400170 struct backing_dev_info *bdi = wb->bdi;
171
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 if (bdi->congested_fn)
Tejun Heoec8a6f22015-05-22 17:13:41 -0400173 return bdi->congested_fn(bdi->congested_data, cong_bits);
174 return wb->congested->state & cong_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175}
176
Jens Axboe8aa7e842009-07-09 14:52:32 +0200177long congestion_wait(int sync, long timeout);
Mel Gorman0e093d992010-10-26 14:21:45 -0700178long wait_iff_congested(struct zone *zone, int sync, long timeout);
Wanpeng Li3965c9a2012-07-31 16:41:52 -0700179int pdflush_proc_obsolete(struct ctl_table *table, int write,
180 void __user *buffer, size_t *lenp, loff_t *ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Darrick J. Wong7d311cd2013-02-21 16:42:48 -0800182static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
183{
184 return bdi->capabilities & BDI_CAP_STABLE_WRITES;
185}
186
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700187static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
188{
189 return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
190}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700192static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
193{
194 return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
195}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700197static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
198{
199 /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
200 return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
201 BDI_CAP_NO_WRITEBACK));
202}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700204static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
205{
Christoph Hellwigde1414a2015-01-14 10:42:36 +0100206 return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700207}
208
209static inline bool mapping_cap_account_dirty(struct address_space *mapping)
210{
Christoph Hellwigde1414a2015-01-14 10:42:36 +0100211 return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
Miklos Szeredie4ad08f2008-04-30 00:54:37 -0700212}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213
Jens Axboe03ba3782009-09-09 09:08:54 +0200214static inline int bdi_sched_wait(void *word)
215{
216 schedule();
217 return 0;
218}
219
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400220#ifdef CONFIG_CGROUP_WRITEBACK
221
Tejun Heo52ebea72015-05-22 17:13:37 -0400222struct bdi_writeback_congested *
223wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
224void wb_congested_put(struct bdi_writeback_congested *congested);
225struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
226 struct cgroup_subsys_state *memcg_css,
227 gfp_t gfp);
228void __inode_attach_wb(struct inode *inode, struct page *page);
229void wb_memcg_offline(struct mem_cgroup *memcg);
230void wb_blkcg_offline(struct blkcg *blkcg);
Tejun Heo703c2702015-05-22 17:13:44 -0400231int inode_congested(struct inode *inode, int cong_bits);
Tejun Heo52ebea72015-05-22 17:13:37 -0400232
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400233/**
234 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
235 * @inode: inode of interest
236 *
237 * cgroup writeback requires support from both the bdi and filesystem.
238 * Test whether @inode has both.
239 */
240static inline bool inode_cgwb_enabled(struct inode *inode)
241{
242 struct backing_dev_info *bdi = inode_to_bdi(inode);
243
244 return bdi_cap_account_dirty(bdi) &&
245 (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
246 (inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK);
247}
248
Tejun Heo52ebea72015-05-22 17:13:37 -0400249/**
250 * wb_tryget - try to increment a wb's refcount
251 * @wb: bdi_writeback to get
252 */
253static inline bool wb_tryget(struct bdi_writeback *wb)
254{
255 if (wb != &wb->bdi->wb)
256 return percpu_ref_tryget(&wb->refcnt);
257 return true;
258}
259
260/**
261 * wb_get - increment a wb's refcount
262 * @wb: bdi_writeback to get
263 */
264static inline void wb_get(struct bdi_writeback *wb)
265{
266 if (wb != &wb->bdi->wb)
267 percpu_ref_get(&wb->refcnt);
268}
269
270/**
271 * wb_put - decrement a wb's refcount
272 * @wb: bdi_writeback to put
273 */
274static inline void wb_put(struct bdi_writeback *wb)
275{
276 if (wb != &wb->bdi->wb)
277 percpu_ref_put(&wb->refcnt);
278}
279
280/**
281 * wb_find_current - find wb for %current on a bdi
282 * @bdi: bdi of interest
283 *
284 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
285 * Must be called under rcu_read_lock() which protects the returend wb.
286 * NULL if not found.
287 */
288static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
289{
290 struct cgroup_subsys_state *memcg_css;
291 struct bdi_writeback *wb;
292
293 memcg_css = task_css(current, memory_cgrp_id);
294 if (!memcg_css->parent)
295 return &bdi->wb;
296
297 wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
298
299 /*
300 * %current's blkcg equals the effective blkcg of its memcg. No
301 * need to use the relatively expensive cgroup_get_e_css().
302 */
303 if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id)))
304 return wb;
305 return NULL;
306}
307
308/**
309 * wb_get_create_current - get or create wb for %current on a bdi
310 * @bdi: bdi of interest
311 * @gfp: allocation mask
312 *
313 * Equivalent to wb_get_create() on %current's memcg. This function is
314 * called from a relatively hot path and optimizes the common cases using
315 * wb_find_current().
316 */
317static inline struct bdi_writeback *
318wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
319{
320 struct bdi_writeback *wb;
321
322 rcu_read_lock();
323 wb = wb_find_current(bdi);
324 if (wb && unlikely(!wb_tryget(wb)))
325 wb = NULL;
326 rcu_read_unlock();
327
328 if (unlikely(!wb)) {
329 struct cgroup_subsys_state *memcg_css;
330
331 memcg_css = task_get_css(current, memory_cgrp_id);
332 wb = wb_get_create(bdi, memcg_css, gfp);
333 css_put(memcg_css);
334 }
335 return wb;
336}
337
338/**
339 * inode_attach_wb - associate an inode with its wb
340 * @inode: inode of interest
341 * @page: page being dirtied (may be NULL)
342 *
343 * If @inode doesn't have its wb, associate it with the wb matching the
344 * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o
345 * @inode->i_lock.
346 */
347static inline void inode_attach_wb(struct inode *inode, struct page *page)
348{
349 if (!inode->i_wb)
350 __inode_attach_wb(inode, page);
351}
352
353/**
354 * inode_detach_wb - disassociate an inode from its wb
355 * @inode: inode of interest
356 *
357 * @inode is being freed. Detach from its wb.
358 */
359static inline void inode_detach_wb(struct inode *inode)
360{
361 if (inode->i_wb) {
362 wb_put(inode->i_wb);
363 inode->i_wb = NULL;
364 }
365}
366
367/**
368 * inode_to_wb - determine the wb of an inode
369 * @inode: inode of interest
370 *
371 * Returns the wb @inode is currently associated with.
372 */
373static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
374{
375 return inode->i_wb;
376}
377
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400378#else /* CONFIG_CGROUP_WRITEBACK */
379
380static inline bool inode_cgwb_enabled(struct inode *inode)
381{
382 return false;
383}
384
Tejun Heo52ebea72015-05-22 17:13:37 -0400385static inline struct bdi_writeback_congested *
386wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
387{
388 return bdi->wb.congested;
389}
390
391static inline void wb_congested_put(struct bdi_writeback_congested *congested)
392{
393}
394
395static inline bool wb_tryget(struct bdi_writeback *wb)
396{
397 return true;
398}
399
400static inline void wb_get(struct bdi_writeback *wb)
401{
402}
403
404static inline void wb_put(struct bdi_writeback *wb)
405{
406}
407
408static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
409{
410 return &bdi->wb;
411}
412
413static inline struct bdi_writeback *
414wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
415{
416 return &bdi->wb;
417}
418
419static inline void inode_attach_wb(struct inode *inode, struct page *page)
420{
421}
422
423static inline void inode_detach_wb(struct inode *inode)
424{
425}
426
427static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
428{
429 return &inode_to_bdi(inode)->wb;
430}
431
432static inline void wb_memcg_offline(struct mem_cgroup *memcg)
433{
434}
435
436static inline void wb_blkcg_offline(struct blkcg *blkcg)
437{
438}
439
Tejun Heo703c2702015-05-22 17:13:44 -0400440static inline int inode_congested(struct inode *inode, int cong_bits)
441{
442 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
443}
444
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400445#endif /* CONFIG_CGROUP_WRITEBACK */
446
Tejun Heo703c2702015-05-22 17:13:44 -0400447static inline int inode_read_congested(struct inode *inode)
448{
449 return inode_congested(inode, 1 << WB_sync_congested);
450}
451
452static inline int inode_write_congested(struct inode *inode)
453{
454 return inode_congested(inode, 1 << WB_async_congested);
455}
456
457static inline int inode_rw_congested(struct inode *inode)
458{
459 return inode_congested(inode, (1 << WB_sync_congested) |
460 (1 << WB_async_congested));
461}
462
Tejun Heoec8a6f22015-05-22 17:13:41 -0400463static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
464{
465 return wb_congested(&bdi->wb, cong_bits);
466}
467
468static inline int bdi_read_congested(struct backing_dev_info *bdi)
469{
470 return bdi_congested(bdi, 1 << WB_sync_congested);
471}
472
473static inline int bdi_write_congested(struct backing_dev_info *bdi)
474{
475 return bdi_congested(bdi, 1 << WB_async_congested);
476}
477
478static inline int bdi_rw_congested(struct backing_dev_info *bdi)
479{
480 return bdi_congested(bdi, (1 << WB_sync_congested) |
481 (1 << WB_async_congested));
482}
483
Tejun Heo89e9b9e2015-05-22 17:13:36 -0400484#endif /* _LINUX_BACKING_DEV_H */