Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2014 Facebook. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public |
| 6 | * License v2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public |
| 14 | * License along with this program; if not, write to the |
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ |
| 18 | |
| 19 | #ifndef __BTRFS_QGROUP__ |
| 20 | #define __BTRFS_QGROUP__ |
| 21 | |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 22 | #include "ulist.h" |
| 23 | #include "delayed-ref.h" |
| 24 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 25 | /* |
Qu Wenruo | 1d2beaa | 2016-10-18 09:31:26 +0800 | [diff] [blame] | 26 | * Btrfs qgroup overview |
| 27 | * |
| 28 | * Btrfs qgroup splits into 3 main part: |
| 29 | * 1) Reserve |
| 30 | * Reserve metadata/data space for incoming operations |
| 31 | * Affect how qgroup limit works |
| 32 | * |
| 33 | * 2) Trace |
| 34 | * Tell btrfs qgroup to trace dirty extents. |
| 35 | * |
| 36 | * Dirty extents including: |
| 37 | * - Newly allocated extents |
| 38 | * - Extents going to be deleted (in this trans) |
| 39 | * - Extents whose owner is going to be modified |
| 40 | * |
| 41 | * This is the main part affects whether qgroup numbers will stay |
| 42 | * consistent. |
| 43 | * Btrfs qgroup can trace clean extents and won't cause any problem, |
| 44 | * but it will consume extra CPU time, it should be avoided if possible. |
| 45 | * |
| 46 | * 3) Account |
| 47 | * Btrfs qgroup will updates its numbers, based on dirty extents traced |
| 48 | * in previous step. |
| 49 | * |
| 50 | * Normally at qgroup rescan and transaction commit time. |
| 51 | */ |
| 52 | |
| 53 | /* |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 54 | * Record a dirty extent, and info qgroup to update quota on it |
| 55 | * TODO: Use kmem cache to alloc it. |
| 56 | */ |
| 57 | struct btrfs_qgroup_extent_record { |
| 58 | struct rb_node node; |
| 59 | u64 bytenr; |
| 60 | u64 num_bytes; |
| 61 | struct ulist *old_roots; |
| 62 | }; |
| 63 | |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame^] | 64 | enum btrfs_qgroup_rsv_type { |
| 65 | BTRFS_QGROUP_RSV_DATA = 0, |
| 66 | BTRFS_QGROUP_RSV_META, |
| 67 | BTRFS_QGROUP_RSV_LAST, |
| 68 | }; |
| 69 | |
| 70 | /* |
| 71 | * Represents how many bytes we have reserved for this qgroup. |
| 72 | * |
| 73 | * Each type should have different reservation behavior. |
| 74 | * E.g, data follows its io_tree flag modification, while |
| 75 | * *currently* meta is just reserve-and-clear during transcation. |
| 76 | * |
| 77 | * TODO: Add new type for reservation which can survive transaction commit. |
| 78 | * Currect metadata reservation behavior is not suitable for such case. |
| 79 | */ |
| 80 | struct btrfs_qgroup_rsv { |
| 81 | u64 values[BTRFS_QGROUP_RSV_LAST]; |
| 82 | }; |
| 83 | |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 84 | /* |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 85 | * one struct for each qgroup, organized in fs_info->qgroup_tree. |
| 86 | */ |
| 87 | struct btrfs_qgroup { |
| 88 | u64 qgroupid; |
| 89 | |
| 90 | /* |
| 91 | * state |
| 92 | */ |
| 93 | u64 rfer; /* referenced */ |
| 94 | u64 rfer_cmpr; /* referenced compressed */ |
| 95 | u64 excl; /* exclusive */ |
| 96 | u64 excl_cmpr; /* exclusive compressed */ |
| 97 | |
| 98 | /* |
| 99 | * limits |
| 100 | */ |
| 101 | u64 lim_flags; /* which limits are set */ |
| 102 | u64 max_rfer; |
| 103 | u64 max_excl; |
| 104 | u64 rsv_rfer; |
| 105 | u64 rsv_excl; |
| 106 | |
| 107 | /* |
| 108 | * reservation tracking |
| 109 | */ |
| 110 | u64 reserved; |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame^] | 111 | struct btrfs_qgroup_rsv rsv; |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 112 | |
| 113 | /* |
| 114 | * lists |
| 115 | */ |
| 116 | struct list_head groups; /* groups this group is member of */ |
| 117 | struct list_head members; /* groups that are members of this group */ |
| 118 | struct list_head dirty; /* dirty groups */ |
| 119 | struct rb_node node; /* tree of qgroups */ |
| 120 | |
| 121 | /* |
| 122 | * temp variables for accounting operations |
| 123 | * Refer to qgroup_shared_accounting() for details. |
| 124 | */ |
| 125 | u64 old_refcnt; |
| 126 | u64 new_refcnt; |
| 127 | }; |
| 128 | |
| 129 | /* |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 130 | * For qgroup event trace points only |
| 131 | */ |
| 132 | #define QGROUP_RESERVE (1<<0) |
| 133 | #define QGROUP_RELEASE (1<<1) |
| 134 | #define QGROUP_FREE (1<<2) |
| 135 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 136 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
| 137 | struct btrfs_fs_info *fs_info); |
| 138 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
| 139 | struct btrfs_fs_info *fs_info); |
| 140 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
| 141 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
Jeff Mahoney | d06f23d | 2016-08-08 22:08:06 -0400 | [diff] [blame] | 142 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
| 143 | bool interruptible); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 144 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
| 145 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 146 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
| 147 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 148 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
Dongsheng Yang | 4087cf2 | 2015-01-18 10:59:23 -0500 | [diff] [blame] | 149 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 150 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, |
| 151 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
| 152 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, |
| 153 | struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 154 | struct btrfs_qgroup_limit *limit); |
| 155 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); |
| 156 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); |
| 157 | struct btrfs_delayed_extent_op; |
Qu Wenruo | d1b8b94 | 2017-02-27 15:10:35 +0800 | [diff] [blame] | 158 | |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 159 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 160 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 161 | * So qgroup can account it at transaction committing time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 162 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 163 | * No lock version, caller must acquire delayed ref lock and allocated memory, |
| 164 | * then call btrfs_qgroup_trace_extent_post() after exiting lock context. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 165 | * |
| 166 | * Return 0 for success insert |
| 167 | * Return >0 for existing record, caller can free @record safely. |
| 168 | * Error is not possible |
| 169 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 170 | int btrfs_qgroup_trace_extent_nolock( |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 171 | struct btrfs_fs_info *fs_info, |
| 172 | struct btrfs_delayed_ref_root *delayed_refs, |
| 173 | struct btrfs_qgroup_extent_record *record); |
| 174 | |
| 175 | /* |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 176 | * Post handler after qgroup_trace_extent_nolock(). |
| 177 | * |
| 178 | * NOTE: Current qgroup does the expensive backref walk at transaction |
| 179 | * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming |
| 180 | * new transaction. |
| 181 | * This is designed to allow btrfs_find_all_roots() to get correct new_roots |
| 182 | * result. |
| 183 | * |
| 184 | * However for old_roots there is no need to do backref walk at that time, |
| 185 | * since we search commit roots to walk backref and result will always be |
| 186 | * correct. |
| 187 | * |
| 188 | * Due to the nature of no lock version, we can't do backref there. |
| 189 | * So we must call btrfs_qgroup_trace_extent_post() after exiting |
| 190 | * spinlock context. |
| 191 | * |
| 192 | * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result |
| 193 | * using current root, then we can move all expensive backref walk out of |
| 194 | * transaction committing, but not now as qgroup accounting will be wrong again. |
| 195 | */ |
| 196 | int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, |
| 197 | struct btrfs_qgroup_extent_record *qrecord); |
| 198 | |
| 199 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 200 | * Inform qgroup to trace one dirty extent, specified by @bytenr and |
| 201 | * @num_bytes. |
| 202 | * So qgroup can account it at commit trans time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 203 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 204 | * Better encapsulated version, with memory allocation and backref walk for |
| 205 | * commit roots. |
| 206 | * So this can sleep. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 207 | * |
| 208 | * Return 0 if the operation is done. |
| 209 | * Return <0 for error, like memory allocation failure or invalid parameter |
| 210 | * (NULL trans) |
| 211 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 212 | int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 213 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, |
| 214 | gfp_t gfp_flag); |
| 215 | |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 216 | /* |
| 217 | * Inform qgroup to trace all leaf items of data |
| 218 | * |
| 219 | * Return 0 for success |
| 220 | * Return <0 for error(ENOMEM) |
| 221 | */ |
| 222 | int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, |
Jeff Mahoney | 2ff7e61 | 2016-06-22 18:54:24 -0400 | [diff] [blame] | 223 | struct btrfs_fs_info *fs_info, |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 224 | struct extent_buffer *eb); |
| 225 | /* |
| 226 | * Inform qgroup to trace a whole subtree, including all its child tree |
| 227 | * blocks and data. |
| 228 | * The root tree block is specified by @root_eb. |
| 229 | * |
| 230 | * Normally used by relocation(tree block swap) and subvolume deletion. |
| 231 | * |
| 232 | * Return 0 for success |
| 233 | * Return <0 for error(ENOMEM or tree search error) |
| 234 | */ |
| 235 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
| 236 | struct btrfs_root *root, |
| 237 | struct extent_buffer *root_eb, |
| 238 | u64 root_gen, int root_level); |
Qu Wenruo | 442244c | 2015-04-16 17:18:36 +0800 | [diff] [blame] | 239 | int |
| 240 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
| 241 | struct btrfs_fs_info *fs_info, |
| 242 | u64 bytenr, u64 num_bytes, |
| 243 | struct ulist *old_roots, struct ulist *new_roots); |
Nikolay Borisov | 460fb20 | 2018-03-15 16:00:25 +0200 | [diff] [blame] | 244 | int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 245 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, |
| 246 | struct btrfs_fs_info *fs_info); |
| 247 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, |
| 248 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, |
| 249 | struct btrfs_qgroup_inherit *inherit); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 250 | void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame^] | 251 | u64 ref_root, u64 num_bytes, |
| 252 | enum btrfs_qgroup_rsv_type type); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 253 | static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, |
| 254 | u64 ref_root, u64 num_bytes) |
| 255 | { |
Jeff Mahoney | bc07452 | 2016-06-09 17:27:55 -0400 | [diff] [blame] | 256 | trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame^] | 257 | btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes, |
| 258 | BTRFS_QGROUP_RSV_DATA); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 259 | } |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 260 | |
| 261 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 262 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 263 | u64 rfer, u64 excl); |
| 264 | #endif |
| 265 | |
Qu Wenruo | 5247255 | 2015-10-12 16:05:40 +0800 | [diff] [blame] | 266 | /* New io_tree based accurate qgroup reserve API */ |
Qu Wenruo | 364ecf3 | 2017-02-27 15:10:38 +0800 | [diff] [blame] | 267 | int btrfs_qgroup_reserve_data(struct inode *inode, |
| 268 | struct extent_changeset **reserved, u64 start, u64 len); |
Qu Wenruo | f695fdc | 2015-10-12 16:28:06 +0800 | [diff] [blame] | 269 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
Qu Wenruo | bc42bda | 2017-02-27 15:10:39 +0800 | [diff] [blame] | 270 | int btrfs_qgroup_free_data(struct inode *inode, |
| 271 | struct extent_changeset *reserved, u64 start, u64 len); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 272 | |
Jeff Mahoney | 003d7c5 | 2017-01-25 09:50:33 -0500 | [diff] [blame] | 273 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
| 274 | bool enforce); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 275 | void btrfs_qgroup_free_meta_all(struct btrfs_root *root); |
| 276 | void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes); |
Qu Wenruo | 56fa9d0 | 2015-10-13 09:53:10 +0800 | [diff] [blame] | 277 | void btrfs_qgroup_check_reserved_leak(struct inode *inode); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 278 | #endif /* __BTRFS_QGROUP__ */ |