Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2014 Facebook. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public |
| 6 | * License v2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public |
| 14 | * License along with this program; if not, write to the |
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ |
| 18 | |
| 19 | #ifndef __BTRFS_QGROUP__ |
| 20 | #define __BTRFS_QGROUP__ |
| 21 | |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 22 | #include "ulist.h" |
| 23 | #include "delayed-ref.h" |
| 24 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 25 | /* |
Qu Wenruo | 1d2beaa | 2016-10-18 09:31:26 +0800 | [diff] [blame] | 26 | * Btrfs qgroup overview |
| 27 | * |
| 28 | * Btrfs qgroup splits into 3 main part: |
| 29 | * 1) Reserve |
| 30 | * Reserve metadata/data space for incoming operations |
| 31 | * Affect how qgroup limit works |
| 32 | * |
| 33 | * 2) Trace |
| 34 | * Tell btrfs qgroup to trace dirty extents. |
| 35 | * |
| 36 | * Dirty extents including: |
| 37 | * - Newly allocated extents |
| 38 | * - Extents going to be deleted (in this trans) |
| 39 | * - Extents whose owner is going to be modified |
| 40 | * |
| 41 | * This is the main part affects whether qgroup numbers will stay |
| 42 | * consistent. |
| 43 | * Btrfs qgroup can trace clean extents and won't cause any problem, |
| 44 | * but it will consume extra CPU time, it should be avoided if possible. |
| 45 | * |
| 46 | * 3) Account |
| 47 | * Btrfs qgroup will updates its numbers, based on dirty extents traced |
| 48 | * in previous step. |
| 49 | * |
| 50 | * Normally at qgroup rescan and transaction commit time. |
| 51 | */ |
| 52 | |
| 53 | /* |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 54 | * Record a dirty extent, and info qgroup to update quota on it |
| 55 | * TODO: Use kmem cache to alloc it. |
| 56 | */ |
| 57 | struct btrfs_qgroup_extent_record { |
| 58 | struct rb_node node; |
| 59 | u64 bytenr; |
| 60 | u64 num_bytes; |
| 61 | struct ulist *old_roots; |
| 62 | }; |
| 63 | |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 64 | /* |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 65 | * one struct for each qgroup, organized in fs_info->qgroup_tree. |
| 66 | */ |
| 67 | struct btrfs_qgroup { |
| 68 | u64 qgroupid; |
| 69 | |
| 70 | /* |
| 71 | * state |
| 72 | */ |
| 73 | u64 rfer; /* referenced */ |
| 74 | u64 rfer_cmpr; /* referenced compressed */ |
| 75 | u64 excl; /* exclusive */ |
| 76 | u64 excl_cmpr; /* exclusive compressed */ |
| 77 | |
| 78 | /* |
| 79 | * limits |
| 80 | */ |
| 81 | u64 lim_flags; /* which limits are set */ |
| 82 | u64 max_rfer; |
| 83 | u64 max_excl; |
| 84 | u64 rsv_rfer; |
| 85 | u64 rsv_excl; |
| 86 | |
| 87 | /* |
| 88 | * reservation tracking |
| 89 | */ |
| 90 | u64 reserved; |
| 91 | |
| 92 | /* |
| 93 | * lists |
| 94 | */ |
| 95 | struct list_head groups; /* groups this group is member of */ |
| 96 | struct list_head members; /* groups that are members of this group */ |
| 97 | struct list_head dirty; /* dirty groups */ |
| 98 | struct rb_node node; /* tree of qgroups */ |
| 99 | |
| 100 | /* |
| 101 | * temp variables for accounting operations |
| 102 | * Refer to qgroup_shared_accounting() for details. |
| 103 | */ |
| 104 | u64 old_refcnt; |
| 105 | u64 new_refcnt; |
| 106 | }; |
| 107 | |
| 108 | /* |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 109 | * For qgroup event trace points only |
| 110 | */ |
| 111 | #define QGROUP_RESERVE (1<<0) |
| 112 | #define QGROUP_RELEASE (1<<1) |
| 113 | #define QGROUP_FREE (1<<2) |
| 114 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 115 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
| 116 | struct btrfs_fs_info *fs_info); |
| 117 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
| 118 | struct btrfs_fs_info *fs_info); |
| 119 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
| 120 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
Jeff Mahoney | d06f23d | 2016-08-08 22:08:06 -0400 | [diff] [blame] | 121 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
| 122 | bool interruptible); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 123 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
| 124 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 125 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
| 126 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 127 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
Dongsheng Yang | 4087cf2 | 2015-01-18 10:59:23 -0500 | [diff] [blame] | 128 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 129 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, |
| 130 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
| 131 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, |
| 132 | struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 133 | struct btrfs_qgroup_limit *limit); |
| 134 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); |
| 135 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); |
| 136 | struct btrfs_delayed_extent_op; |
Qu Wenruo | d1b8b94 | 2017-02-27 15:10:35 +0800 | [diff] [blame^] | 137 | |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 138 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 139 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 140 | * So qgroup can account it at transaction committing time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 141 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 142 | * No lock version, caller must acquire delayed ref lock and allocated memory, |
| 143 | * then call btrfs_qgroup_trace_extent_post() after exiting lock context. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 144 | * |
| 145 | * Return 0 for success insert |
| 146 | * Return >0 for existing record, caller can free @record safely. |
| 147 | * Error is not possible |
| 148 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 149 | int btrfs_qgroup_trace_extent_nolock( |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 150 | struct btrfs_fs_info *fs_info, |
| 151 | struct btrfs_delayed_ref_root *delayed_refs, |
| 152 | struct btrfs_qgroup_extent_record *record); |
| 153 | |
| 154 | /* |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 155 | * Post handler after qgroup_trace_extent_nolock(). |
| 156 | * |
| 157 | * NOTE: Current qgroup does the expensive backref walk at transaction |
| 158 | * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming |
| 159 | * new transaction. |
| 160 | * This is designed to allow btrfs_find_all_roots() to get correct new_roots |
| 161 | * result. |
| 162 | * |
| 163 | * However for old_roots there is no need to do backref walk at that time, |
| 164 | * since we search commit roots to walk backref and result will always be |
| 165 | * correct. |
| 166 | * |
| 167 | * Due to the nature of no lock version, we can't do backref there. |
| 168 | * So we must call btrfs_qgroup_trace_extent_post() after exiting |
| 169 | * spinlock context. |
| 170 | * |
| 171 | * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result |
| 172 | * using current root, then we can move all expensive backref walk out of |
| 173 | * transaction committing, but not now as qgroup accounting will be wrong again. |
| 174 | */ |
| 175 | int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, |
| 176 | struct btrfs_qgroup_extent_record *qrecord); |
| 177 | |
| 178 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 179 | * Inform qgroup to trace one dirty extent, specified by @bytenr and |
| 180 | * @num_bytes. |
| 181 | * So qgroup can account it at commit trans time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 182 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 183 | * Better encapsulated version, with memory allocation and backref walk for |
| 184 | * commit roots. |
| 185 | * So this can sleep. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 186 | * |
| 187 | * Return 0 if the operation is done. |
| 188 | * Return <0 for error, like memory allocation failure or invalid parameter |
| 189 | * (NULL trans) |
| 190 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 191 | int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 192 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, |
| 193 | gfp_t gfp_flag); |
| 194 | |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 195 | /* |
| 196 | * Inform qgroup to trace all leaf items of data |
| 197 | * |
| 198 | * Return 0 for success |
| 199 | * Return <0 for error(ENOMEM) |
| 200 | */ |
| 201 | int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, |
Jeff Mahoney | 2ff7e61 | 2016-06-22 18:54:24 -0400 | [diff] [blame] | 202 | struct btrfs_fs_info *fs_info, |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 203 | struct extent_buffer *eb); |
| 204 | /* |
| 205 | * Inform qgroup to trace a whole subtree, including all its child tree |
| 206 | * blocks and data. |
| 207 | * The root tree block is specified by @root_eb. |
| 208 | * |
| 209 | * Normally used by relocation(tree block swap) and subvolume deletion. |
| 210 | * |
| 211 | * Return 0 for success |
| 212 | * Return <0 for error(ENOMEM or tree search error) |
| 213 | */ |
| 214 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
| 215 | struct btrfs_root *root, |
| 216 | struct extent_buffer *root_eb, |
| 217 | u64 root_gen, int root_level); |
Qu Wenruo | 442244c | 2015-04-16 17:18:36 +0800 | [diff] [blame] | 218 | int |
| 219 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
| 220 | struct btrfs_fs_info *fs_info, |
| 221 | u64 bytenr, u64 num_bytes, |
| 222 | struct ulist *old_roots, struct ulist *new_roots); |
Qu Wenruo | 550d7a2 | 2015-04-16 15:37:33 +0800 | [diff] [blame] | 223 | int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, |
| 224 | struct btrfs_fs_info *fs_info); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 225 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, |
| 226 | struct btrfs_fs_info *fs_info); |
| 227 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, |
| 228 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, |
| 229 | struct btrfs_qgroup_inherit *inherit); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 230 | void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, |
| 231 | u64 ref_root, u64 num_bytes); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 232 | static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, |
| 233 | u64 ref_root, u64 num_bytes) |
| 234 | { |
Jeff Mahoney | bc07452 | 2016-06-09 17:27:55 -0400 | [diff] [blame] | 235 | trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); |
Qu Wenruo | d51ea5d | 2017-03-13 15:52:09 +0800 | [diff] [blame] | 236 | btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 237 | } |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 238 | |
| 239 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 240 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 241 | u64 rfer, u64 excl); |
| 242 | #endif |
| 243 | |
Qu Wenruo | 5247255 | 2015-10-12 16:05:40 +0800 | [diff] [blame] | 244 | /* New io_tree based accurate qgroup reserve API */ |
| 245 | int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len); |
Qu Wenruo | f695fdc | 2015-10-12 16:28:06 +0800 | [diff] [blame] | 246 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
| 247 | int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 248 | |
Jeff Mahoney | 003d7c5 | 2017-01-25 09:50:33 -0500 | [diff] [blame] | 249 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
| 250 | bool enforce); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 251 | void btrfs_qgroup_free_meta_all(struct btrfs_root *root); |
| 252 | void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes); |
Qu Wenruo | 56fa9d0 | 2015-10-13 09:53:10 +0800 | [diff] [blame] | 253 | void btrfs_qgroup_check_reserved_leak(struct inode *inode); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 254 | #endif /* __BTRFS_QGROUP__ */ |