Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
Uwe Zeisberger | f30c226 | 2006-10-03 23:01:26 +0200 | [diff] [blame] | 2 | * include/linux/writeback.h |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3 | */ |
| 4 | #ifndef WRITEBACK_H |
| 5 | #define WRITEBACK_H |
| 6 | |
Alexey Dobriyan | e8edc6e | 2007-05-21 01:22:52 +0400 | [diff] [blame] | 7 | #include <linux/sched.h> |
Kent Overstreet | a27bb33 | 2013-05-07 16:19:08 -0700 | [diff] [blame] | 8 | #include <linux/workqueue.h> |
Jens Axboe | f5ff842 | 2007-09-21 09:19:54 +0200 | [diff] [blame] | 9 | #include <linux/fs.h> |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 10 | #include <linux/flex_proportions.h> |
Alexey Dobriyan | e8edc6e | 2007-05-21 01:22:52 +0400 | [diff] [blame] | 11 | |
Wu Fengguang | 54848d7 | 2011-04-05 13:21:19 -0600 | [diff] [blame] | 12 | DECLARE_PER_CPU(int, dirty_throttle_leaks); |
| 13 | |
Wu Fengguang | ffd1f60 | 2011-06-19 22:18:42 -0600 | [diff] [blame] | 14 | /* |
Wu Fengguang | 1a12d8b | 2010-08-29 13:28:09 -0600 | [diff] [blame] | 15 | * The 1/4 region under the global dirty thresh is for smooth dirty throttling: |
| 16 | * |
| 17 | * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) |
| 18 | * |
Wu Fengguang | ffd1f60 | 2011-06-19 22:18:42 -0600 | [diff] [blame] | 19 | * Further beyond, all dirtier tasks will enter a loop waiting (possibly long |
| 20 | * time) for the dirty pages to drop, unless written enough pages. |
| 21 | * |
| 22 | * The global dirty threshold is normally equal to the global dirty limit, |
| 23 | * except when the system suddenly allocates a lot of anonymous memory and |
| 24 | * knocks down the global dirty threshold quickly, in which case the global |
| 25 | * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. |
| 26 | */ |
Wu Fengguang | 1a12d8b | 2010-08-29 13:28:09 -0600 | [diff] [blame] | 27 | #define DIRTY_SCOPE 8 |
| 28 | #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) |
Wu Fengguang | ffd1f60 | 2011-06-19 22:18:42 -0600 | [diff] [blame] | 29 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 30 | struct backing_dev_info; |
| 31 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 32 | /* |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | * fs/fs-writeback.c |
| 34 | */ |
| 35 | enum writeback_sync_modes { |
| 36 | WB_SYNC_NONE, /* Don't wait on anything */ |
| 37 | WB_SYNC_ALL, /* Wait on every mapping */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 38 | }; |
| 39 | |
| 40 | /* |
Curt Wohlgemuth | 0e175a1 | 2011-10-07 21:54:10 -0600 | [diff] [blame] | 41 | * why some writeback work was initiated |
| 42 | */ |
| 43 | enum wb_reason { |
| 44 | WB_REASON_BACKGROUND, |
| 45 | WB_REASON_TRY_TO_FREE_PAGES, |
| 46 | WB_REASON_SYNC, |
| 47 | WB_REASON_PERIODIC, |
| 48 | WB_REASON_LAPTOP_TIMER, |
| 49 | WB_REASON_FREE_MORE_MEM, |
| 50 | WB_REASON_FS_FREE_SPACE, |
Wanpeng Li | fc6df80 | 2013-07-08 16:00:15 -0700 | [diff] [blame] | 51 | /* |
| 52 | * There is no bdi forker thread any more and works are done |
| 53 | * by emergency worker, however, this is TPs userland visible |
| 54 | * and we'll be exposing exactly the same information, |
| 55 | * so it has a mismatch name. |
| 56 | */ |
Curt Wohlgemuth | 0e175a1 | 2011-10-07 21:54:10 -0600 | [diff] [blame] | 57 | WB_REASON_FORKER_THREAD, |
| 58 | |
| 59 | WB_REASON_MAX, |
| 60 | }; |
Curt Wohlgemuth | 0e175a1 | 2011-10-07 21:54:10 -0600 | [diff] [blame] | 61 | |
| 62 | /* |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 63 | * A control structure which tells the writeback code what to do. These are |
| 64 | * always on the stack, and hence need no locking. They are always initialised |
| 65 | * in a manner such that unspecified fields are set to zero. |
| 66 | */ |
| 67 | struct writeback_control { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 68 | long nr_to_write; /* Write this many pages, and decrement |
| 69 | this for each page written */ |
| 70 | long pages_skipped; /* Pages which were not written */ |
| 71 | |
| 72 | /* |
Andrew Morton | 95468fd | 2012-03-05 15:06:02 -0800 | [diff] [blame] | 73 | * For a_ops->writepages(): if start or end are non-zero then this is |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 74 | * a hint that the filesystem need only write out the pages inside that |
| 75 | * byterange. The byte at `end' is included in the writeout request. |
| 76 | */ |
OGAWA Hirofumi | 111ebb6 | 2006-06-23 02:03:26 -0700 | [diff] [blame] | 77 | loff_t range_start; |
| 78 | loff_t range_end; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | |
Richard Kennedy | 4cd9069 | 2012-04-25 14:53:05 +0100 | [diff] [blame] | 80 | enum writeback_sync_modes sync_mode; |
| 81 | |
Andrew Morton | 22905f7 | 2005-11-16 15:07:01 -0800 | [diff] [blame] | 82 | unsigned for_kupdate:1; /* A kupdate writeback */ |
Wu Fengguang | b17621f | 2009-12-03 13:54:25 +0100 | [diff] [blame] | 83 | unsigned for_background:1; /* A background writeback */ |
Wu Fengguang | 6e6938b | 2010-06-06 10:38:15 -0600 | [diff] [blame] | 84 | unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ |
Andrew Morton | 22905f7 | 2005-11-16 15:07:01 -0800 | [diff] [blame] | 85 | unsigned for_reclaim:1; /* Invoked from the page allocator */ |
OGAWA Hirofumi | 111ebb6 | 2006-06-23 02:03:26 -0700 | [diff] [blame] | 86 | unsigned range_cyclic:1; /* range_start is cyclic */ |
Dave Chinner | 7747bd4 | 2013-07-02 22:38:35 +1000 | [diff] [blame] | 87 | unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 88 | }; |
| 89 | |
| 90 | /* |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 91 | * A wb_domain represents a domain that wb's (bdi_writeback's) belong to |
| 92 | * and are measured against each other in. There always is one global |
| 93 | * domain, global_wb_domain, that every wb in the system is a member of. |
| 94 | * This allows measuring the relative bandwidth of each wb to distribute |
| 95 | * dirtyable memory accordingly. |
| 96 | */ |
| 97 | struct wb_domain { |
Tejun Heo | dcc25ae | 2015-05-22 18:23:22 -0400 | [diff] [blame] | 98 | spinlock_t lock; |
| 99 | |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 100 | /* |
| 101 | * Scale the writeback cache size proportional to the relative |
| 102 | * writeout speed. |
| 103 | * |
| 104 | * We do this by keeping a floating proportion between BDIs, based |
| 105 | * on page writeback completions [end_page_writeback()]. Those |
| 106 | * devices that write out pages fastest will get the larger share, |
| 107 | * while the slower will get a smaller share. |
| 108 | * |
| 109 | * We use page writeout completions because we are interested in |
| 110 | * getting rid of dirty pages. Having them written out is the |
| 111 | * primary goal. |
| 112 | * |
| 113 | * We introduce a concept of time, a period over which we measure |
| 114 | * these events, because demand can/will vary over time. The length |
| 115 | * of this period itself is measured in page writeback completions. |
| 116 | */ |
| 117 | struct fprop_global completions; |
| 118 | struct timer_list period_timer; /* timer for aging of completions */ |
| 119 | unsigned long period_time; |
Tejun Heo | dcc25ae | 2015-05-22 18:23:22 -0400 | [diff] [blame] | 120 | |
| 121 | /* |
| 122 | * The dirtyable memory and dirty threshold could be suddenly |
| 123 | * knocked down by a large amount (eg. on the startup of KVM in a |
| 124 | * swapless system). This may throw the system into deep dirty |
| 125 | * exceeded state and throttle heavy/light dirtiers alike. To |
| 126 | * retain good responsiveness, maintain global_dirty_limit for |
| 127 | * tracking slowly down to the knocked down dirty threshold. |
| 128 | * |
| 129 | * Both fields are protected by ->lock. |
| 130 | */ |
| 131 | unsigned long dirty_limit_tstamp; |
| 132 | unsigned long dirty_limit; |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 133 | }; |
| 134 | |
Tejun Heo | 2529bb3 | 2015-05-22 18:23:34 -0400 | [diff] [blame] | 135 | /** |
| 136 | * wb_domain_size_changed - memory available to a wb_domain has changed |
| 137 | * @dom: wb_domain of interest |
| 138 | * |
| 139 | * This function should be called when the amount of memory available to |
| 140 | * @dom has changed. It resets @dom's dirty limit parameters to prevent |
| 141 | * the past values which don't match the current configuration from skewing |
| 142 | * dirty throttling. Without this, when memory size of a wb_domain is |
| 143 | * greatly reduced, the dirty throttling logic may allow too many pages to |
| 144 | * be dirtied leading to consecutive unnecessary OOMs and may get stuck in |
| 145 | * that situation. |
| 146 | */ |
| 147 | static inline void wb_domain_size_changed(struct wb_domain *dom) |
| 148 | { |
| 149 | spin_lock(&dom->lock); |
| 150 | dom->dirty_limit_tstamp = jiffies; |
| 151 | dom->dirty_limit = 0; |
| 152 | spin_unlock(&dom->lock); |
| 153 | } |
| 154 | |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 155 | /* |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 156 | * fs/fs-writeback.c |
| 157 | */ |
Jens Axboe | 03ba378 | 2009-09-09 09:08:54 +0200 | [diff] [blame] | 158 | struct bdi_writeback; |
Curt Wohlgemuth | 0e175a1 | 2011-10-07 21:54:10 -0600 | [diff] [blame] | 159 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
| 160 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
| 161 | enum wb_reason reason); |
Tejun Heo | f30a7d0 | 2015-05-22 17:14:00 -0400 | [diff] [blame] | 162 | bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
| 163 | bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
| 164 | enum wb_reason reason); |
Jan Kara | 0dc83bd | 2014-02-21 11:19:04 +0100 | [diff] [blame] | 165 | void sync_inodes_sb(struct super_block *); |
Curt Wohlgemuth | 0e175a1 | 2011-10-07 21:54:10 -0600 | [diff] [blame] | 166 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
Jan Kara | 169ebd9 | 2012-05-03 14:48:03 +0200 | [diff] [blame] | 167 | void inode_wait_for_writeback(struct inode *inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 168 | |
| 169 | /* writeback.h requires fs.h; it, too, is not included from here. */ |
| 170 | static inline void wait_on_inode(struct inode *inode) |
| 171 | { |
| 172 | might_sleep(); |
NeilBrown | 7431620 | 2014-07-07 15:16:04 +1000 | [diff] [blame] | 173 | wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 174 | } |
Joern Engel | 1c0eeaf | 2007-10-16 23:30:44 -0700 | [diff] [blame] | 175 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 176 | /* |
| 177 | * mm/page-writeback.c |
| 178 | */ |
Jens Axboe | c2c4986 | 2010-05-20 09:18:47 +0200 | [diff] [blame] | 179 | #ifdef CONFIG_BLOCK |
Matthew Garrett | 31373d0 | 2010-04-06 14:25:14 +0200 | [diff] [blame] | 180 | void laptop_io_completion(struct backing_dev_info *info); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | void laptop_sync_completion(void); |
Matthew Garrett | 31373d0 | 2010-04-06 14:25:14 +0200 | [diff] [blame] | 182 | void laptop_mode_sync(struct work_struct *work); |
| 183 | void laptop_mode_timer_fn(unsigned long data); |
Jens Axboe | c2c4986 | 2010-05-20 09:18:47 +0200 | [diff] [blame] | 184 | #else |
| 185 | static inline void laptop_sync_completion(void) { } |
| 186 | #endif |
Andrew Morton | 232ea4d | 2007-02-28 20:13:21 -0800 | [diff] [blame] | 187 | void throttle_vm_writeout(gfp_t gfp_mask); |
Johannes Weiner | a756cf5 | 2012-01-10 15:07:49 -0800 | [diff] [blame] | 188 | bool zone_dirty_ok(struct zone *zone); |
Tejun Heo | 380c27c | 2015-05-22 18:23:21 -0400 | [diff] [blame] | 189 | int wb_domain_init(struct wb_domain *dom, gfp_t gfp); |
Tejun Heo | 841710a | 2015-05-22 18:23:33 -0400 | [diff] [blame] | 190 | #ifdef CONFIG_CGROUP_WRITEBACK |
| 191 | void wb_domain_exit(struct wb_domain *dom); |
| 192 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 193 | |
Tejun Heo | dcc25ae | 2015-05-22 18:23:22 -0400 | [diff] [blame] | 194 | extern struct wb_domain global_wb_domain; |
Wu Fengguang | c42843f | 2011-03-02 15:54:09 -0600 | [diff] [blame] | 195 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 196 | /* These are exported to sysctl. */ |
| 197 | extern int dirty_background_ratio; |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 198 | extern unsigned long dirty_background_bytes; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | extern int vm_dirty_ratio; |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 200 | extern unsigned long vm_dirty_bytes; |
Alexey Dobriyan | 704503d | 2009-03-31 15:23:18 -0700 | [diff] [blame] | 201 | extern unsigned int dirty_writeback_interval; |
| 202 | extern unsigned int dirty_expire_interval; |
Theodore Ts'o | 1efff91 | 2015-03-17 12:23:32 -0400 | [diff] [blame] | 203 | extern unsigned int dirtytime_expire_interval; |
Bron Gondwana | 195cf453 | 2008-02-04 22:29:20 -0800 | [diff] [blame] | 204 | extern int vm_highmem_is_dirtyable; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 205 | extern int block_dump; |
| 206 | extern int laptop_mode; |
| 207 | |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 208 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, |
Alexey Dobriyan | 8d65af7 | 2009-09-23 15:57:19 -0700 | [diff] [blame] | 209 | void __user *buffer, size_t *lenp, |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 210 | loff_t *ppos); |
| 211 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, |
Alexey Dobriyan | 8d65af7 | 2009-09-23 15:57:19 -0700 | [diff] [blame] | 212 | void __user *buffer, size_t *lenp, |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 213 | loff_t *ppos); |
Peter Zijlstra | 04fbfdc | 2007-10-16 23:25:50 -0700 | [diff] [blame] | 214 | extern int dirty_ratio_handler(struct ctl_table *table, int write, |
Alexey Dobriyan | 8d65af7 | 2009-09-23 15:57:19 -0700 | [diff] [blame] | 215 | void __user *buffer, size_t *lenp, |
Peter Zijlstra | 04fbfdc | 2007-10-16 23:25:50 -0700 | [diff] [blame] | 216 | loff_t *ppos); |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 217 | extern int dirty_bytes_handler(struct ctl_table *table, int write, |
Alexey Dobriyan | 8d65af7 | 2009-09-23 15:57:19 -0700 | [diff] [blame] | 218 | void __user *buffer, size_t *lenp, |
David Rientjes | 2da0299 | 2009-01-06 14:39:31 -0800 | [diff] [blame] | 219 | loff_t *ppos); |
Theodore Ts'o | 1efff91 | 2015-03-17 12:23:32 -0400 | [diff] [blame] | 220 | int dirtytime_interval_handler(struct ctl_table *table, int write, |
| 221 | void __user *buffer, size_t *lenp, loff_t *ppos); |
Peter Zijlstra | 04fbfdc | 2007-10-16 23:25:50 -0700 | [diff] [blame] | 222 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | struct ctl_table; |
Alexey Dobriyan | 8d65af7 | 2009-09-23 15:57:19 -0700 | [diff] [blame] | 224 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | void __user *, size_t *, loff_t *); |
| 226 | |
Wu Fengguang | 16c4042 | 2010-08-11 14:17:39 -0700 | [diff] [blame] | 227 | void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); |
Tejun Heo | 0d960a3 | 2015-05-22 18:23:19 -0400 | [diff] [blame] | 228 | unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); |
Peter Zijlstra | cf0ca9f | 2008-04-30 00:54:32 -0700 | [diff] [blame] | 229 | |
Tejun Heo | 8a73179 | 2015-05-22 18:23:20 -0400 | [diff] [blame] | 230 | void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 231 | void page_writeback_init(void); |
Namjae Jeon | d0e1d66 | 2012-12-11 16:00:21 -0800 | [diff] [blame] | 232 | void balance_dirty_pages_ratelimited(struct address_space *mapping); |
Tejun Heo | aa661bb | 2015-05-22 18:23:31 -0400 | [diff] [blame] | 233 | bool wb_over_bg_thresh(struct bdi_writeback *wb); |
Andrew Morton | fa5a734 | 2006-03-24 03:18:10 -0800 | [diff] [blame] | 234 | |
Miklos Szeredi | 0ea9718 | 2007-05-10 22:22:51 -0700 | [diff] [blame] | 235 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, |
| 236 | void *data); |
| 237 | |
Miklos Szeredi | 0ea9718 | 2007-05-10 22:22:51 -0700 | [diff] [blame] | 238 | int generic_writepages(struct address_space *mapping, |
| 239 | struct writeback_control *wbc); |
Eric Sandeen | 5b41d92 | 2010-10-27 21:30:13 -0400 | [diff] [blame] | 240 | void tag_pages_for_writeback(struct address_space *mapping, |
| 241 | pgoff_t start, pgoff_t end); |
Miklos Szeredi | 0ea9718 | 2007-05-10 22:22:51 -0700 | [diff] [blame] | 242 | int write_cache_pages(struct address_space *mapping, |
| 243 | struct writeback_control *wbc, writepage_t writepage, |
| 244 | void *data); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 245 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); |
Chandra Seetharaman | 2d1d43f | 2006-09-29 02:01:25 -0700 | [diff] [blame] | 246 | void writeback_set_ratelimit(void); |
Namhyung Kim | 92c09c0 | 2010-10-26 14:22:03 -0700 | [diff] [blame] | 247 | void tag_pages_for_writeback(struct address_space *mapping, |
| 248 | pgoff_t start, pgoff_t end); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 249 | |
Wu Fengguang | 2f800fb | 2011-08-08 15:22:00 -0600 | [diff] [blame] | 250 | void account_page_redirty(struct page *page); |
| 251 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | #endif /* WRITEBACK_H */ |