Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2014 Facebook. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public |
| 6 | * License v2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public |
| 14 | * License along with this program; if not, write to the |
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ |
| 18 | |
| 19 | #ifndef __BTRFS_QGROUP__ |
| 20 | #define __BTRFS_QGROUP__ |
| 21 | |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 22 | #include "ulist.h" |
| 23 | #include "delayed-ref.h" |
| 24 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 25 | /* |
Qu Wenruo | 1d2beaa | 2016-10-18 09:31:26 +0800 | [diff] [blame] | 26 | * Btrfs qgroup overview |
| 27 | * |
| 28 | * Btrfs qgroup splits into 3 main part: |
| 29 | * 1) Reserve |
| 30 | * Reserve metadata/data space for incoming operations |
| 31 | * Affect how qgroup limit works |
| 32 | * |
| 33 | * 2) Trace |
| 34 | * Tell btrfs qgroup to trace dirty extents. |
| 35 | * |
| 36 | * Dirty extents including: |
| 37 | * - Newly allocated extents |
| 38 | * - Extents going to be deleted (in this trans) |
| 39 | * - Extents whose owner is going to be modified |
| 40 | * |
| 41 | * This is the main part affects whether qgroup numbers will stay |
| 42 | * consistent. |
| 43 | * Btrfs qgroup can trace clean extents and won't cause any problem, |
| 44 | * but it will consume extra CPU time, it should be avoided if possible. |
| 45 | * |
| 46 | * 3) Account |
| 47 | * Btrfs qgroup will updates its numbers, based on dirty extents traced |
| 48 | * in previous step. |
| 49 | * |
| 50 | * Normally at qgroup rescan and transaction commit time. |
| 51 | */ |
| 52 | |
| 53 | /* |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 54 | * Record a dirty extent, and info qgroup to update quota on it |
| 55 | * TODO: Use kmem cache to alloc it. |
| 56 | */ |
| 57 | struct btrfs_qgroup_extent_record { |
| 58 | struct rb_node node; |
| 59 | u64 bytenr; |
| 60 | u64 num_bytes; |
| 61 | struct ulist *old_roots; |
| 62 | }; |
| 63 | |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame^] | 64 | /* |
| 65 | * Qgroup reservation types: |
| 66 | * |
| 67 | * DATA: |
| 68 | * space reserved for data |
| 69 | * |
| 70 | * META_PERTRANS: |
| 71 | * Space reserved for metadata (per-transaction) |
| 72 | * Due to the fact that qgroup data is only updated at transaction commit |
| 73 | * time, reserved space for metadata must be kept until transaction |
| 74 | * commits. |
| 75 | * Any metadata reserved that are used in btrfs_start_transaction() should |
| 76 | * be of this type. |
| 77 | * |
| 78 | * META_PREALLOC: |
| 79 | * There are cases where metadata space is reserved before starting |
| 80 | * transaction, and then btrfs_join_transaction() to get a trans handle. |
| 81 | * Any metadata reserved for such usage should be of this type. |
| 82 | * And after join_transaction() part (or all) of such reservation should |
| 83 | * be converted into META_PERTRANS. |
| 84 | */ |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 85 | enum btrfs_qgroup_rsv_type { |
| 86 | BTRFS_QGROUP_RSV_DATA = 0, |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame^] | 87 | BTRFS_QGROUP_RSV_META_PERTRANS, |
| 88 | BTRFS_QGROUP_RSV_META_PREALLOC, |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 89 | BTRFS_QGROUP_RSV_LAST, |
| 90 | }; |
| 91 | |
| 92 | /* |
| 93 | * Represents how many bytes we have reserved for this qgroup. |
| 94 | * |
| 95 | * Each type should have different reservation behavior. |
| 96 | * E.g, data follows its io_tree flag modification, while |
| 97 | * *currently* meta is just reserve-and-clear during transcation. |
| 98 | * |
| 99 | * TODO: Add new type for reservation which can survive transaction commit. |
| 100 | * Currect metadata reservation behavior is not suitable for such case. |
| 101 | */ |
| 102 | struct btrfs_qgroup_rsv { |
| 103 | u64 values[BTRFS_QGROUP_RSV_LAST]; |
| 104 | }; |
| 105 | |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 106 | /* |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 107 | * one struct for each qgroup, organized in fs_info->qgroup_tree. |
| 108 | */ |
| 109 | struct btrfs_qgroup { |
| 110 | u64 qgroupid; |
| 111 | |
| 112 | /* |
| 113 | * state |
| 114 | */ |
| 115 | u64 rfer; /* referenced */ |
| 116 | u64 rfer_cmpr; /* referenced compressed */ |
| 117 | u64 excl; /* exclusive */ |
| 118 | u64 excl_cmpr; /* exclusive compressed */ |
| 119 | |
| 120 | /* |
| 121 | * limits |
| 122 | */ |
| 123 | u64 lim_flags; /* which limits are set */ |
| 124 | u64 max_rfer; |
| 125 | u64 max_excl; |
| 126 | u64 rsv_rfer; |
| 127 | u64 rsv_excl; |
| 128 | |
| 129 | /* |
| 130 | * reservation tracking |
| 131 | */ |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 132 | struct btrfs_qgroup_rsv rsv; |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 133 | |
| 134 | /* |
| 135 | * lists |
| 136 | */ |
| 137 | struct list_head groups; /* groups this group is member of */ |
| 138 | struct list_head members; /* groups that are members of this group */ |
| 139 | struct list_head dirty; /* dirty groups */ |
| 140 | struct rb_node node; /* tree of qgroups */ |
| 141 | |
| 142 | /* |
| 143 | * temp variables for accounting operations |
| 144 | * Refer to qgroup_shared_accounting() for details. |
| 145 | */ |
| 146 | u64 old_refcnt; |
| 147 | u64 new_refcnt; |
| 148 | }; |
| 149 | |
| 150 | /* |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 151 | * For qgroup event trace points only |
| 152 | */ |
| 153 | #define QGROUP_RESERVE (1<<0) |
| 154 | #define QGROUP_RELEASE (1<<1) |
| 155 | #define QGROUP_FREE (1<<2) |
| 156 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 157 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
| 158 | struct btrfs_fs_info *fs_info); |
| 159 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
| 160 | struct btrfs_fs_info *fs_info); |
| 161 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
| 162 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
Jeff Mahoney | d06f23d | 2016-08-08 22:08:06 -0400 | [diff] [blame] | 163 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
| 164 | bool interruptible); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 165 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
| 166 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 167 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
| 168 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 169 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
Dongsheng Yang | 4087cf2 | 2015-01-18 10:59:23 -0500 | [diff] [blame] | 170 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 171 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, |
| 172 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
| 173 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, |
| 174 | struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 175 | struct btrfs_qgroup_limit *limit); |
| 176 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); |
| 177 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); |
| 178 | struct btrfs_delayed_extent_op; |
Qu Wenruo | d1b8b94 | 2017-02-27 15:10:35 +0800 | [diff] [blame] | 179 | |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 180 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 181 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 182 | * So qgroup can account it at transaction committing time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 183 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 184 | * No lock version, caller must acquire delayed ref lock and allocated memory, |
| 185 | * then call btrfs_qgroup_trace_extent_post() after exiting lock context. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 186 | * |
| 187 | * Return 0 for success insert |
| 188 | * Return >0 for existing record, caller can free @record safely. |
| 189 | * Error is not possible |
| 190 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 191 | int btrfs_qgroup_trace_extent_nolock( |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 192 | struct btrfs_fs_info *fs_info, |
| 193 | struct btrfs_delayed_ref_root *delayed_refs, |
| 194 | struct btrfs_qgroup_extent_record *record); |
| 195 | |
| 196 | /* |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 197 | * Post handler after qgroup_trace_extent_nolock(). |
| 198 | * |
| 199 | * NOTE: Current qgroup does the expensive backref walk at transaction |
| 200 | * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming |
| 201 | * new transaction. |
| 202 | * This is designed to allow btrfs_find_all_roots() to get correct new_roots |
| 203 | * result. |
| 204 | * |
| 205 | * However for old_roots there is no need to do backref walk at that time, |
| 206 | * since we search commit roots to walk backref and result will always be |
| 207 | * correct. |
| 208 | * |
| 209 | * Due to the nature of no lock version, we can't do backref there. |
| 210 | * So we must call btrfs_qgroup_trace_extent_post() after exiting |
| 211 | * spinlock context. |
| 212 | * |
| 213 | * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result |
| 214 | * using current root, then we can move all expensive backref walk out of |
| 215 | * transaction committing, but not now as qgroup accounting will be wrong again. |
| 216 | */ |
| 217 | int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, |
| 218 | struct btrfs_qgroup_extent_record *qrecord); |
| 219 | |
| 220 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 221 | * Inform qgroup to trace one dirty extent, specified by @bytenr and |
| 222 | * @num_bytes. |
| 223 | * So qgroup can account it at commit trans time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 224 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 225 | * Better encapsulated version, with memory allocation and backref walk for |
| 226 | * commit roots. |
| 227 | * So this can sleep. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 228 | * |
| 229 | * Return 0 if the operation is done. |
| 230 | * Return <0 for error, like memory allocation failure or invalid parameter |
| 231 | * (NULL trans) |
| 232 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 233 | int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 234 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, |
| 235 | gfp_t gfp_flag); |
| 236 | |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 237 | /* |
| 238 | * Inform qgroup to trace all leaf items of data |
| 239 | * |
| 240 | * Return 0 for success |
| 241 | * Return <0 for error(ENOMEM) |
| 242 | */ |
| 243 | int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, |
Jeff Mahoney | 2ff7e61 | 2016-06-22 18:54:24 -0400 | [diff] [blame] | 244 | struct btrfs_fs_info *fs_info, |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 245 | struct extent_buffer *eb); |
| 246 | /* |
| 247 | * Inform qgroup to trace a whole subtree, including all its child tree |
| 248 | * blocks and data. |
| 249 | * The root tree block is specified by @root_eb. |
| 250 | * |
| 251 | * Normally used by relocation(tree block swap) and subvolume deletion. |
| 252 | * |
| 253 | * Return 0 for success |
| 254 | * Return <0 for error(ENOMEM or tree search error) |
| 255 | */ |
| 256 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
| 257 | struct btrfs_root *root, |
| 258 | struct extent_buffer *root_eb, |
| 259 | u64 root_gen, int root_level); |
Qu Wenruo | 442244c | 2015-04-16 17:18:36 +0800 | [diff] [blame] | 260 | int |
| 261 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
| 262 | struct btrfs_fs_info *fs_info, |
| 263 | u64 bytenr, u64 num_bytes, |
| 264 | struct ulist *old_roots, struct ulist *new_roots); |
Nikolay Borisov | 460fb20 | 2018-03-15 16:00:25 +0200 | [diff] [blame] | 265 | int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 266 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, |
| 267 | struct btrfs_fs_info *fs_info); |
| 268 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, |
| 269 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, |
| 270 | struct btrfs_qgroup_inherit *inherit); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 271 | void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 272 | u64 ref_root, u64 num_bytes, |
| 273 | enum btrfs_qgroup_rsv_type type); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 274 | static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, |
| 275 | u64 ref_root, u64 num_bytes) |
| 276 | { |
Jeff Mahoney | bc07452 | 2016-06-09 17:27:55 -0400 | [diff] [blame] | 277 | trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 278 | btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes, |
| 279 | BTRFS_QGROUP_RSV_DATA); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 280 | } |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 281 | |
| 282 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 283 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 284 | u64 rfer, u64 excl); |
| 285 | #endif |
| 286 | |
Qu Wenruo | 5247255 | 2015-10-12 16:05:40 +0800 | [diff] [blame] | 287 | /* New io_tree based accurate qgroup reserve API */ |
Qu Wenruo | 364ecf3 | 2017-02-27 15:10:38 +0800 | [diff] [blame] | 288 | int btrfs_qgroup_reserve_data(struct inode *inode, |
| 289 | struct extent_changeset **reserved, u64 start, u64 len); |
Qu Wenruo | f695fdc | 2015-10-12 16:28:06 +0800 | [diff] [blame] | 290 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
Qu Wenruo | bc42bda | 2017-02-27 15:10:39 +0800 | [diff] [blame] | 291 | int btrfs_qgroup_free_data(struct inode *inode, |
| 292 | struct extent_changeset *reserved, u64 start, u64 len); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 293 | |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame^] | 294 | int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
| 295 | enum btrfs_qgroup_rsv_type type, bool enforce); |
| 296 | /* Reserve metadata space for pertrans and prealloc type */ |
| 297 | static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root, |
| 298 | int num_bytes, bool enforce) |
| 299 | { |
| 300 | return __btrfs_qgroup_reserve_meta(root, num_bytes, |
| 301 | BTRFS_QGROUP_RSV_META_PERTRANS, enforce); |
| 302 | } |
| 303 | static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root, |
| 304 | int num_bytes, bool enforce) |
| 305 | { |
| 306 | return __btrfs_qgroup_reserve_meta(root, num_bytes, |
| 307 | BTRFS_QGROUP_RSV_META_PREALLOC, enforce); |
| 308 | } |
| 309 | |
| 310 | void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, |
| 311 | enum btrfs_qgroup_rsv_type type); |
| 312 | |
| 313 | /* Free per-transaction meta reservation for error handling */ |
| 314 | static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root, |
| 315 | int num_bytes) |
| 316 | { |
| 317 | __btrfs_qgroup_free_meta(root, num_bytes, |
| 318 | BTRFS_QGROUP_RSV_META_PERTRANS); |
| 319 | } |
| 320 | |
| 321 | /* Pre-allocated meta reservation can be freed at need */ |
| 322 | static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root, |
| 323 | int num_bytes) |
| 324 | { |
| 325 | __btrfs_qgroup_free_meta(root, num_bytes, |
| 326 | BTRFS_QGROUP_RSV_META_PREALLOC); |
| 327 | } |
| 328 | |
| 329 | /* |
| 330 | * Per-transaction meta reservation should be all freed at transaction commit |
| 331 | * time |
| 332 | */ |
| 333 | void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root); |
| 334 | |
Qu Wenruo | 56fa9d0 | 2015-10-13 09:53:10 +0800 | [diff] [blame] | 335 | void btrfs_qgroup_check_reserved_leak(struct inode *inode); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 336 | #endif /* __BTRFS_QGROUP__ */ |