blob: eb453506facd09c42a75fff553e60a150e1686fa [file] [log] [blame]
Josef Bacik0f9dd462008-09-23 13:14:11 -04001/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Josef Bacik96303082009-07-13 21:29:25 -040019#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040020#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040022#include <linux/math64.h>
Josef Bacik6ab60602011-08-08 08:24:46 -040023#include <linux/ratelimit.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040024#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025#include "free-space-cache.h"
26#include "transaction.h"
Josef Bacik0af3d002010-06-21 14:48:16 -040027#include "disk-io.h"
Josef Bacik43be2142011-04-01 14:55:00 +000028#include "extent_io.h"
Li Zefan581bb052011-04-20 10:06:11 +080029#include "inode-map.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040030
Josef Bacik96303082009-07-13 21:29:25 -040031#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
Li Zefan34d52cb2011-03-29 13:46:06 +080034static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0cb59c92010-07-02 12:14:14 -040035 struct btrfs_free_space *info);
36
Li Zefan0414efa2011-04-20 10:20:14 +080037static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -040040{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
Josef Bacik0af3d002010-06-21 14:48:16 -040049 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +080050 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -040051 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +020057 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040058 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
David Sterbab3b4aa72011-04-21 01:20:15 +020066 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040067
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
Al Viro528c0322012-04-13 11:03:55 -040078 mapping_set_gfp_mask(inode->i_mapping,
79 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
Miao Xieadae52b2011-03-31 09:43:23 +000080
Li Zefan0414efa2011-04-20 10:20:14 +080081 return inode;
82}
83
84struct inode *lookup_free_space_inode(struct btrfs_root *root,
85 struct btrfs_block_group_cache
86 *block_group, struct btrfs_path *path)
87{
88 struct inode *inode = NULL;
Josef Bacik5b0e95b2011-10-06 08:58:24 -040089 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
Li Zefan0414efa2011-04-20 10:20:14 +080090
91 spin_lock(&block_group->lock);
92 if (block_group->inode)
93 inode = igrab(block_group->inode);
94 spin_unlock(&block_group->lock);
95 if (inode)
96 return inode;
97
98 inode = __lookup_free_space_inode(root, path,
99 block_group->key.objectid);
100 if (IS_ERR(inode))
101 return inode;
102
Josef Bacik0af3d002010-06-21 14:48:16 -0400103 spin_lock(&block_group->lock);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400104 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
Josef Bacik2f356122011-06-10 15:31:13 -0400105 printk(KERN_INFO "Old style space inode found, converting.\n");
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400106 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
107 BTRFS_INODE_NODATACOW;
Josef Bacik2f356122011-06-10 15:31:13 -0400108 block_group->disk_cache_state = BTRFS_DC_CLEAR;
109 }
110
Josef Bacik300e4f82011-08-29 14:06:00 -0400111 if (!block_group->iref) {
Josef Bacik0af3d002010-06-21 14:48:16 -0400112 block_group->inode = igrab(inode);
113 block_group->iref = 1;
114 }
115 spin_unlock(&block_group->lock);
116
117 return inode;
118}
119
Li Zefan0414efa2011-04-20 10:20:14 +0800120int __create_free_space_inode(struct btrfs_root *root,
121 struct btrfs_trans_handle *trans,
122 struct btrfs_path *path, u64 ino, u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -0400123{
124 struct btrfs_key key;
125 struct btrfs_disk_key disk_key;
126 struct btrfs_free_space_header *header;
127 struct btrfs_inode_item *inode_item;
128 struct extent_buffer *leaf;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400129 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
Josef Bacik0af3d002010-06-21 14:48:16 -0400130 int ret;
131
Li Zefan0414efa2011-04-20 10:20:14 +0800132 ret = btrfs_insert_empty_inode(trans, root, path, ino);
Josef Bacik0af3d002010-06-21 14:48:16 -0400133 if (ret)
134 return ret;
135
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400136 /* We inline crc's for the free disk space cache */
137 if (ino != BTRFS_FREE_INO_OBJECTID)
138 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
139
Josef Bacik0af3d002010-06-21 14:48:16 -0400140 leaf = path->nodes[0];
141 inode_item = btrfs_item_ptr(leaf, path->slots[0],
142 struct btrfs_inode_item);
143 btrfs_item_key(leaf, &disk_key, path->slots[0]);
144 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
145 sizeof(*inode_item));
146 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
147 btrfs_set_inode_size(leaf, inode_item, 0);
148 btrfs_set_inode_nbytes(leaf, inode_item, 0);
149 btrfs_set_inode_uid(leaf, inode_item, 0);
150 btrfs_set_inode_gid(leaf, inode_item, 0);
151 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400152 btrfs_set_inode_flags(leaf, inode_item, flags);
Josef Bacik0af3d002010-06-21 14:48:16 -0400153 btrfs_set_inode_nlink(leaf, inode_item, 1);
154 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
Li Zefan0414efa2011-04-20 10:20:14 +0800155 btrfs_set_inode_block_group(leaf, inode_item, offset);
Josef Bacik0af3d002010-06-21 14:48:16 -0400156 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200157 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400158
159 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800160 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -0400161 key.type = 0;
162
163 ret = btrfs_insert_empty_item(trans, root, path, &key,
164 sizeof(struct btrfs_free_space_header));
165 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200166 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400167 return ret;
168 }
169 leaf = path->nodes[0];
170 header = btrfs_item_ptr(leaf, path->slots[0],
171 struct btrfs_free_space_header);
172 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
173 btrfs_set_free_space_key(leaf, header, &disk_key);
174 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200175 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400176
177 return 0;
178}
179
Li Zefan0414efa2011-04-20 10:20:14 +0800180int create_free_space_inode(struct btrfs_root *root,
181 struct btrfs_trans_handle *trans,
182 struct btrfs_block_group_cache *block_group,
183 struct btrfs_path *path)
184{
185 int ret;
186 u64 ino;
187
188 ret = btrfs_find_free_objectid(root, &ino);
189 if (ret < 0)
190 return ret;
191
192 return __create_free_space_inode(root, trans, path, ino,
193 block_group->key.objectid);
194}
195
Josef Bacik0af3d002010-06-21 14:48:16 -0400196int btrfs_truncate_free_space_cache(struct btrfs_root *root,
197 struct btrfs_trans_handle *trans,
198 struct btrfs_path *path,
199 struct inode *inode)
200{
Liu Bo65450aa2011-09-11 10:52:24 -0400201 struct btrfs_block_rsv *rsv;
Josef Bacikc8174312011-11-02 09:29:35 -0400202 u64 needed_bytes;
Josef Bacik0af3d002010-06-21 14:48:16 -0400203 loff_t oldsize;
204 int ret = 0;
205
Liu Bo65450aa2011-09-11 10:52:24 -0400206 rsv = trans->block_rsv;
Josef Bacikc8174312011-11-02 09:29:35 -0400207 trans->block_rsv = &root->fs_info->global_block_rsv;
208
209 /* 1 for slack space, 1 for updating the inode */
210 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
211 btrfs_calc_trans_metadata_size(root, 1);
212
213 spin_lock(&trans->block_rsv->lock);
214 if (trans->block_rsv->reserved < needed_bytes) {
215 spin_unlock(&trans->block_rsv->lock);
216 trans->block_rsv = rsv;
217 return -ENOSPC;
218 }
219 spin_unlock(&trans->block_rsv->lock);
Josef Bacik0af3d002010-06-21 14:48:16 -0400220
221 oldsize = i_size_read(inode);
222 btrfs_i_size_write(inode, 0);
223 truncate_pagecache(inode, oldsize, 0);
224
225 /*
226 * We don't need an orphan item because truncating the free space cache
227 * will never be split across transactions.
228 */
229 ret = btrfs_truncate_inode_items(trans, root, inode,
230 0, BTRFS_EXTENT_DATA_KEY);
Liu Bo65450aa2011-09-11 10:52:24 -0400231
Josef Bacik0af3d002010-06-21 14:48:16 -0400232 if (ret) {
Josef Bacikc8174312011-11-02 09:29:35 -0400233 trans->block_rsv = rsv;
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100234 btrfs_abort_transaction(trans, root, ret);
Josef Bacik0af3d002010-06-21 14:48:16 -0400235 return ret;
236 }
237
Li Zefan82d59022011-04-20 10:33:24 +0800238 ret = btrfs_update_inode(trans, root, inode);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100239 if (ret)
240 btrfs_abort_transaction(trans, root, ret);
Josef Bacikc8174312011-11-02 09:29:35 -0400241 trans->block_rsv = rsv;
242
Li Zefan82d59022011-04-20 10:33:24 +0800243 return ret;
Josef Bacik0af3d002010-06-21 14:48:16 -0400244}
245
Josef Bacik9d66e232010-08-25 16:54:15 -0400246static int readahead_cache(struct inode *inode)
247{
248 struct file_ra_state *ra;
249 unsigned long last_index;
250
251 ra = kzalloc(sizeof(*ra), GFP_NOFS);
252 if (!ra)
253 return -ENOMEM;
254
255 file_ra_state_init(ra, inode->i_mapping);
256 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
257
258 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
259
260 kfree(ra);
261
262 return 0;
263}
264
Josef Bacika67509c2011-10-05 15:18:58 -0400265struct io_ctl {
266 void *cur, *orig;
267 struct page *page;
268 struct page **pages;
269 struct btrfs_root *root;
270 unsigned long size;
271 int index;
272 int num_pages;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400273 unsigned check_crcs:1;
Josef Bacika67509c2011-10-05 15:18:58 -0400274};
275
276static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
277 struct btrfs_root *root)
278{
279 memset(io_ctl, 0, sizeof(struct io_ctl));
280 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
281 PAGE_CACHE_SHIFT;
282 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
283 GFP_NOFS);
284 if (!io_ctl->pages)
285 return -ENOMEM;
286 io_ctl->root = root;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400287 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
288 io_ctl->check_crcs = 1;
Josef Bacika67509c2011-10-05 15:18:58 -0400289 return 0;
290}
291
292static void io_ctl_free(struct io_ctl *io_ctl)
293{
294 kfree(io_ctl->pages);
295}
296
297static void io_ctl_unmap_page(struct io_ctl *io_ctl)
298{
299 if (io_ctl->cur) {
300 kunmap(io_ctl->page);
301 io_ctl->cur = NULL;
302 io_ctl->orig = NULL;
303 }
304}
305
306static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
307{
308 WARN_ON(io_ctl->cur);
309 BUG_ON(io_ctl->index >= io_ctl->num_pages);
310 io_ctl->page = io_ctl->pages[io_ctl->index++];
311 io_ctl->cur = kmap(io_ctl->page);
312 io_ctl->orig = io_ctl->cur;
313 io_ctl->size = PAGE_CACHE_SIZE;
314 if (clear)
315 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
316}
317
318static void io_ctl_drop_pages(struct io_ctl *io_ctl)
319{
320 int i;
321
322 io_ctl_unmap_page(io_ctl);
323
324 for (i = 0; i < io_ctl->num_pages; i++) {
Li Zefana1ee5a42012-01-09 14:27:42 +0800325 if (io_ctl->pages[i]) {
326 ClearPageChecked(io_ctl->pages[i]);
327 unlock_page(io_ctl->pages[i]);
328 page_cache_release(io_ctl->pages[i]);
329 }
Josef Bacika67509c2011-10-05 15:18:58 -0400330 }
331}
332
333static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
334 int uptodate)
335{
336 struct page *page;
337 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
338 int i;
339
340 for (i = 0; i < io_ctl->num_pages; i++) {
341 page = find_or_create_page(inode->i_mapping, i, mask);
342 if (!page) {
343 io_ctl_drop_pages(io_ctl);
344 return -ENOMEM;
345 }
346 io_ctl->pages[i] = page;
347 if (uptodate && !PageUptodate(page)) {
348 btrfs_readpage(NULL, page);
349 lock_page(page);
350 if (!PageUptodate(page)) {
351 printk(KERN_ERR "btrfs: error reading free "
352 "space cache\n");
353 io_ctl_drop_pages(io_ctl);
354 return -EIO;
355 }
356 }
357 }
358
Josef Bacikf7d61dc2011-11-15 09:31:24 -0500359 for (i = 0; i < io_ctl->num_pages; i++) {
360 clear_page_dirty_for_io(io_ctl->pages[i]);
361 set_page_extent_mapped(io_ctl->pages[i]);
362 }
363
Josef Bacika67509c2011-10-05 15:18:58 -0400364 return 0;
365}
366
367static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
368{
Al Viro528c0322012-04-13 11:03:55 -0400369 __le64 *val;
Josef Bacika67509c2011-10-05 15:18:58 -0400370
371 io_ctl_map_page(io_ctl, 1);
372
373 /*
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400374 * Skip the csum areas. If we don't check crcs then we just have a
375 * 64bit chunk at the front of the first page.
Josef Bacika67509c2011-10-05 15:18:58 -0400376 */
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400377 if (io_ctl->check_crcs) {
378 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
379 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
380 } else {
381 io_ctl->cur += sizeof(u64);
382 io_ctl->size -= sizeof(u64) * 2;
383 }
Josef Bacika67509c2011-10-05 15:18:58 -0400384
385 val = io_ctl->cur;
386 *val = cpu_to_le64(generation);
387 io_ctl->cur += sizeof(u64);
Josef Bacika67509c2011-10-05 15:18:58 -0400388}
389
390static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
391{
Al Viro528c0322012-04-13 11:03:55 -0400392 __le64 *gen;
Josef Bacika67509c2011-10-05 15:18:58 -0400393
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400394 /*
395 * Skip the crc area. If we don't check crcs then we just have a 64bit
396 * chunk at the front of the first page.
397 */
398 if (io_ctl->check_crcs) {
399 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
400 io_ctl->size -= sizeof(u64) +
401 (sizeof(u32) * io_ctl->num_pages);
402 } else {
403 io_ctl->cur += sizeof(u64);
404 io_ctl->size -= sizeof(u64) * 2;
405 }
Josef Bacika67509c2011-10-05 15:18:58 -0400406
Josef Bacika67509c2011-10-05 15:18:58 -0400407 gen = io_ctl->cur;
408 if (le64_to_cpu(*gen) != generation) {
409 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
410 "(%Lu) does not match inode (%Lu)\n", *gen,
411 generation);
412 io_ctl_unmap_page(io_ctl);
413 return -EIO;
414 }
415 io_ctl->cur += sizeof(u64);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400416 return 0;
417}
418
419static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
420{
421 u32 *tmp;
422 u32 crc = ~(u32)0;
423 unsigned offset = 0;
424
425 if (!io_ctl->check_crcs) {
426 io_ctl_unmap_page(io_ctl);
427 return;
428 }
429
430 if (index == 0)
Justin P. Mattockcb54f252011-11-21 08:43:28 -0800431 offset = sizeof(u32) * io_ctl->num_pages;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400432
433 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
434 PAGE_CACHE_SIZE - offset);
435 btrfs_csum_final(crc, (char *)&crc);
436 io_ctl_unmap_page(io_ctl);
437 tmp = kmap(io_ctl->pages[0]);
438 tmp += index;
439 *tmp = crc;
440 kunmap(io_ctl->pages[0]);
441}
442
443static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
444{
445 u32 *tmp, val;
446 u32 crc = ~(u32)0;
447 unsigned offset = 0;
448
449 if (!io_ctl->check_crcs) {
450 io_ctl_map_page(io_ctl, 0);
451 return 0;
452 }
453
454 if (index == 0)
455 offset = sizeof(u32) * io_ctl->num_pages;
456
457 tmp = kmap(io_ctl->pages[0]);
458 tmp += index;
459 val = *tmp;
460 kunmap(io_ctl->pages[0]);
461
462 io_ctl_map_page(io_ctl, 0);
463 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
464 PAGE_CACHE_SIZE - offset);
465 btrfs_csum_final(crc, (char *)&crc);
466 if (val != crc) {
467 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
468 "space cache\n");
469 io_ctl_unmap_page(io_ctl);
470 return -EIO;
471 }
472
Josef Bacika67509c2011-10-05 15:18:58 -0400473 return 0;
474}
475
476static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
477 void *bitmap)
478{
479 struct btrfs_free_space_entry *entry;
480
481 if (!io_ctl->cur)
482 return -ENOSPC;
483
484 entry = io_ctl->cur;
485 entry->offset = cpu_to_le64(offset);
486 entry->bytes = cpu_to_le64(bytes);
487 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
488 BTRFS_FREE_SPACE_EXTENT;
489 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
490 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
491
492 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
493 return 0;
494
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400495 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400496
497 /* No more pages to map */
498 if (io_ctl->index >= io_ctl->num_pages)
499 return 0;
500
501 /* map the next page */
502 io_ctl_map_page(io_ctl, 1);
503 return 0;
504}
505
506static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
507{
508 if (!io_ctl->cur)
509 return -ENOSPC;
510
511 /*
512 * If we aren't at the start of the current page, unmap this one and
513 * map the next one if there is any left.
514 */
515 if (io_ctl->cur != io_ctl->orig) {
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400516 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400517 if (io_ctl->index >= io_ctl->num_pages)
518 return -ENOSPC;
519 io_ctl_map_page(io_ctl, 0);
520 }
521
522 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400523 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400524 if (io_ctl->index < io_ctl->num_pages)
525 io_ctl_map_page(io_ctl, 0);
526 return 0;
527}
528
529static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
530{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400531 /*
532 * If we're not on the boundary we know we've modified the page and we
533 * need to crc the page.
534 */
535 if (io_ctl->cur != io_ctl->orig)
536 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
537 else
538 io_ctl_unmap_page(io_ctl);
Josef Bacika67509c2011-10-05 15:18:58 -0400539
540 while (io_ctl->index < io_ctl->num_pages) {
541 io_ctl_map_page(io_ctl, 1);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400542 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400543 }
544}
545
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400546static int io_ctl_read_entry(struct io_ctl *io_ctl,
547 struct btrfs_free_space *entry, u8 *type)
Josef Bacika67509c2011-10-05 15:18:58 -0400548{
549 struct btrfs_free_space_entry *e;
Josef Bacik2f120c02011-11-10 20:45:05 -0500550 int ret;
551
552 if (!io_ctl->cur) {
553 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
554 if (ret)
555 return ret;
556 }
Josef Bacika67509c2011-10-05 15:18:58 -0400557
558 e = io_ctl->cur;
559 entry->offset = le64_to_cpu(e->offset);
560 entry->bytes = le64_to_cpu(e->bytes);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400561 *type = e->type;
Josef Bacika67509c2011-10-05 15:18:58 -0400562 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
563 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
564
565 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400566 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400567
568 io_ctl_unmap_page(io_ctl);
569
Josef Bacik2f120c02011-11-10 20:45:05 -0500570 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400571}
572
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400573static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
574 struct btrfs_free_space *entry)
Josef Bacika67509c2011-10-05 15:18:58 -0400575{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400576 int ret;
577
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400578 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
579 if (ret)
580 return ret;
581
Josef Bacika67509c2011-10-05 15:18:58 -0400582 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
583 io_ctl_unmap_page(io_ctl);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400584
585 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400586}
587
Li Zefan0414efa2011-04-20 10:20:14 +0800588int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
589 struct btrfs_free_space_ctl *ctl,
590 struct btrfs_path *path, u64 offset)
Josef Bacik9d66e232010-08-25 16:54:15 -0400591{
Josef Bacik9d66e232010-08-25 16:54:15 -0400592 struct btrfs_free_space_header *header;
593 struct extent_buffer *leaf;
Josef Bacika67509c2011-10-05 15:18:58 -0400594 struct io_ctl io_ctl;
Josef Bacik9d66e232010-08-25 16:54:15 -0400595 struct btrfs_key key;
Josef Bacika67509c2011-10-05 15:18:58 -0400596 struct btrfs_free_space *e, *n;
Josef Bacik9d66e232010-08-25 16:54:15 -0400597 struct list_head bitmaps;
598 u64 num_entries;
599 u64 num_bitmaps;
600 u64 generation;
Josef Bacika67509c2011-10-05 15:18:58 -0400601 u8 type;
Josef Bacikf6a39822011-06-06 10:50:35 -0400602 int ret = 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400603
604 INIT_LIST_HEAD(&bitmaps);
605
Josef Bacik9d66e232010-08-25 16:54:15 -0400606 /* Nothing in the space cache, goodbye */
Li Zefan0414efa2011-04-20 10:20:14 +0800607 if (!i_size_read(inode))
Josef Bacika67509c2011-10-05 15:18:58 -0400608 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400609
610 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800611 key.offset = offset;
Josef Bacik9d66e232010-08-25 16:54:15 -0400612 key.type = 0;
613
614 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Li Zefan0414efa2011-04-20 10:20:14 +0800615 if (ret < 0)
Josef Bacika67509c2011-10-05 15:18:58 -0400616 return 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800617 else if (ret > 0) {
Chris Mason945d8962011-05-22 12:33:42 -0400618 btrfs_release_path(path);
Josef Bacika67509c2011-10-05 15:18:58 -0400619 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400620 }
621
Li Zefan0414efa2011-04-20 10:20:14 +0800622 ret = -1;
623
Josef Bacik9d66e232010-08-25 16:54:15 -0400624 leaf = path->nodes[0];
625 header = btrfs_item_ptr(leaf, path->slots[0],
626 struct btrfs_free_space_header);
627 num_entries = btrfs_free_space_entries(leaf, header);
628 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
629 generation = btrfs_free_space_generation(leaf, header);
Chris Mason945d8962011-05-22 12:33:42 -0400630 btrfs_release_path(path);
Josef Bacik9d66e232010-08-25 16:54:15 -0400631
632 if (BTRFS_I(inode)->generation != generation) {
633 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
Li Zefan0414efa2011-04-20 10:20:14 +0800634 " not match free space cache generation (%llu)\n",
Josef Bacik9d66e232010-08-25 16:54:15 -0400635 (unsigned long long)BTRFS_I(inode)->generation,
Li Zefan0414efa2011-04-20 10:20:14 +0800636 (unsigned long long)generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400637 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400638 }
639
640 if (!num_entries)
Josef Bacika67509c2011-10-05 15:18:58 -0400641 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400642
Li Zefan706efc62012-01-09 14:36:28 +0800643 ret = io_ctl_init(&io_ctl, inode, root);
644 if (ret)
645 return ret;
646
Josef Bacik9d66e232010-08-25 16:54:15 -0400647 ret = readahead_cache(inode);
Li Zefan0414efa2011-04-20 10:20:14 +0800648 if (ret)
Josef Bacik9d66e232010-08-25 16:54:15 -0400649 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400650
Josef Bacika67509c2011-10-05 15:18:58 -0400651 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
652 if (ret)
653 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400654
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400655 ret = io_ctl_check_crc(&io_ctl, 0);
656 if (ret)
657 goto free_cache;
658
Josef Bacika67509c2011-10-05 15:18:58 -0400659 ret = io_ctl_check_generation(&io_ctl, generation);
660 if (ret)
661 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400662
Josef Bacika67509c2011-10-05 15:18:58 -0400663 while (num_entries) {
664 e = kmem_cache_zalloc(btrfs_free_space_cachep,
665 GFP_NOFS);
666 if (!e)
Josef Bacik9d66e232010-08-25 16:54:15 -0400667 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400668
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400669 ret = io_ctl_read_entry(&io_ctl, e, &type);
670 if (ret) {
671 kmem_cache_free(btrfs_free_space_cachep, e);
672 goto free_cache;
673 }
674
Josef Bacika67509c2011-10-05 15:18:58 -0400675 if (!e->bytes) {
676 kmem_cache_free(btrfs_free_space_cachep, e);
677 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400678 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400679
Josef Bacika67509c2011-10-05 15:18:58 -0400680 if (type == BTRFS_FREE_SPACE_EXTENT) {
681 spin_lock(&ctl->tree_lock);
682 ret = link_free_space(ctl, e);
683 spin_unlock(&ctl->tree_lock);
684 if (ret) {
685 printk(KERN_ERR "Duplicate entries in "
686 "free space cache, dumping\n");
Josef Bacikdc89e982011-01-28 17:05:48 -0500687 kmem_cache_free(btrfs_free_space_cachep, e);
Josef Bacik9d66e232010-08-25 16:54:15 -0400688 goto free_cache;
689 }
Josef Bacika67509c2011-10-05 15:18:58 -0400690 } else {
691 BUG_ON(!num_bitmaps);
692 num_bitmaps--;
693 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
694 if (!e->bitmap) {
695 kmem_cache_free(
696 btrfs_free_space_cachep, e);
697 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400698 }
Josef Bacika67509c2011-10-05 15:18:58 -0400699 spin_lock(&ctl->tree_lock);
700 ret = link_free_space(ctl, e);
701 ctl->total_bitmaps++;
702 ctl->op->recalc_thresholds(ctl);
703 spin_unlock(&ctl->tree_lock);
704 if (ret) {
705 printk(KERN_ERR "Duplicate entries in "
706 "free space cache, dumping\n");
707 kmem_cache_free(btrfs_free_space_cachep, e);
708 goto free_cache;
709 }
710 list_add_tail(&e->list, &bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400711 }
712
Josef Bacika67509c2011-10-05 15:18:58 -0400713 num_entries--;
Josef Bacik9d66e232010-08-25 16:54:15 -0400714 }
715
Josef Bacik2f120c02011-11-10 20:45:05 -0500716 io_ctl_unmap_page(&io_ctl);
717
Josef Bacika67509c2011-10-05 15:18:58 -0400718 /*
719 * We add the bitmaps at the end of the entries in order that
720 * the bitmap entries are added to the cache.
721 */
722 list_for_each_entry_safe(e, n, &bitmaps, list) {
723 list_del_init(&e->list);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400724 ret = io_ctl_read_bitmap(&io_ctl, e);
725 if (ret)
726 goto free_cache;
Josef Bacika67509c2011-10-05 15:18:58 -0400727 }
728
729 io_ctl_drop_pages(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400730 ret = 1;
731out:
Josef Bacika67509c2011-10-05 15:18:58 -0400732 io_ctl_free(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400733 return ret;
Josef Bacik9d66e232010-08-25 16:54:15 -0400734free_cache:
Josef Bacika67509c2011-10-05 15:18:58 -0400735 io_ctl_drop_pages(&io_ctl);
Li Zefan0414efa2011-04-20 10:20:14 +0800736 __btrfs_remove_free_space_cache(ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400737 goto out;
738}
739
Li Zefan0414efa2011-04-20 10:20:14 +0800740int load_free_space_cache(struct btrfs_fs_info *fs_info,
741 struct btrfs_block_group_cache *block_group)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400742{
Li Zefan34d52cb2011-03-29 13:46:06 +0800743 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan0414efa2011-04-20 10:20:14 +0800744 struct btrfs_root *root = fs_info->tree_root;
745 struct inode *inode;
746 struct btrfs_path *path;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400747 int ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800748 bool matched;
749 u64 used = btrfs_block_group_used(&block_group->item);
750
751 /*
Li Zefan0414efa2011-04-20 10:20:14 +0800752 * If this block group has been marked to be cleared for one reason or
753 * another then we can't trust the on disk cache, so just return.
754 */
755 spin_lock(&block_group->lock);
756 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
757 spin_unlock(&block_group->lock);
758 return 0;
759 }
760 spin_unlock(&block_group->lock);
761
762 path = btrfs_alloc_path();
763 if (!path)
764 return 0;
Josef Bacikd53ba472012-04-12 16:03:57 -0400765 path->search_commit_root = 1;
766 path->skip_locking = 1;
Li Zefan0414efa2011-04-20 10:20:14 +0800767
768 inode = lookup_free_space_inode(root, block_group, path);
769 if (IS_ERR(inode)) {
770 btrfs_free_path(path);
771 return 0;
772 }
773
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400774 /* We may have converted the inode and made the cache invalid. */
775 spin_lock(&block_group->lock);
776 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
777 spin_unlock(&block_group->lock);
Tsutomu Itoha7e221e2012-02-14 17:12:23 +0900778 btrfs_free_path(path);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400779 goto out;
780 }
781 spin_unlock(&block_group->lock);
782
Li Zefan0414efa2011-04-20 10:20:14 +0800783 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
784 path, block_group->key.objectid);
785 btrfs_free_path(path);
786 if (ret <= 0)
787 goto out;
788
789 spin_lock(&ctl->tree_lock);
790 matched = (ctl->free_space == (block_group->key.offset - used -
791 block_group->bytes_super));
792 spin_unlock(&ctl->tree_lock);
793
794 if (!matched) {
795 __btrfs_remove_free_space_cache(ctl);
796 printk(KERN_ERR "block group %llu has an wrong amount of free "
797 "space\n", block_group->key.objectid);
798 ret = -1;
799 }
800out:
801 if (ret < 0) {
802 /* This cache is bogus, make sure it gets cleared */
803 spin_lock(&block_group->lock);
804 block_group->disk_cache_state = BTRFS_DC_CLEAR;
805 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800806 ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800807
808 printk(KERN_ERR "btrfs: failed to load free space cache "
809 "for block group %llu\n", block_group->key.objectid);
810 }
811
812 iput(inode);
813 return ret;
814}
815
Josef Bacikc09544e2011-08-30 10:19:10 -0400816/**
817 * __btrfs_write_out_cache - write out cached info to an inode
818 * @root - the root the inode belongs to
819 * @ctl - the free space cache we are going to write out
820 * @block_group - the block_group for this cache if it belongs to a block_group
821 * @trans - the trans handle
822 * @path - the path to use
823 * @offset - the offset for the key we'll insert
824 *
825 * This function writes out a free space cache struct to disk for quick recovery
826 * on mount. This will return 0 if it was successfull in writing the cache out,
827 * and -1 if it was not.
828 */
Li Zefan0414efa2011-04-20 10:20:14 +0800829int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
830 struct btrfs_free_space_ctl *ctl,
831 struct btrfs_block_group_cache *block_group,
832 struct btrfs_trans_handle *trans,
833 struct btrfs_path *path, u64 offset)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400834{
835 struct btrfs_free_space_header *header;
836 struct extent_buffer *leaf;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400837 struct rb_node *node;
838 struct list_head *pos, *n;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400839 struct extent_state *cached_state = NULL;
Josef Bacik43be2142011-04-01 14:55:00 +0000840 struct btrfs_free_cluster *cluster = NULL;
841 struct extent_io_tree *unpin = NULL;
Josef Bacika67509c2011-10-05 15:18:58 -0400842 struct io_ctl io_ctl;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400843 struct list_head bitmap_list;
844 struct btrfs_key key;
Li Zefandb804f22012-01-10 16:41:01 +0800845 u64 start, extent_start, extent_end, len;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400846 int entries = 0;
847 int bitmaps = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400848 int ret;
849 int err = -1;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400850
Josef Bacik0cb59c92010-07-02 12:14:14 -0400851 INIT_LIST_HEAD(&bitmap_list);
852
Li Zefan0414efa2011-04-20 10:20:14 +0800853 if (!i_size_read(inode))
854 return -1;
Josef Bacik2b209822010-12-03 13:17:53 -0500855
Li Zefan706efc62012-01-09 14:36:28 +0800856 ret = io_ctl_init(&io_ctl, inode, root);
857 if (ret)
858 return -1;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400859
Josef Bacik43be2142011-04-01 14:55:00 +0000860 /* Get the cluster for this block_group if it exists */
Li Zefan0414efa2011-04-20 10:20:14 +0800861 if (block_group && !list_empty(&block_group->cluster_list))
Josef Bacik43be2142011-04-01 14:55:00 +0000862 cluster = list_entry(block_group->cluster_list.next,
863 struct btrfs_free_cluster,
864 block_group_list);
865
Josef Bacika67509c2011-10-05 15:18:58 -0400866 /* Lock all pages first so we can lock the extent safely. */
867 io_ctl_prepare_pages(&io_ctl, inode, 0);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400868
Josef Bacik0cb59c92010-07-02 12:14:14 -0400869 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
Jeff Mahoneyd0082372012-03-01 14:57:19 +0100870 0, &cached_state);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400871
Josef Bacikf75b1302011-10-05 10:00:18 -0400872 node = rb_first(&ctl->free_space_offset);
873 if (!node && cluster) {
874 node = rb_first(&cluster->root);
875 cluster = NULL;
876 }
877
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400878 /* Make sure we can fit our crcs into the first page */
879 if (io_ctl.check_crcs &&
880 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
881 WARN_ON(1);
882 goto out_nospc;
883 }
884
Josef Bacika67509c2011-10-05 15:18:58 -0400885 io_ctl_set_generation(&io_ctl, trans->transid);
886
Josef Bacik0cb59c92010-07-02 12:14:14 -0400887 /* Write out the extent entries */
Josef Bacika67509c2011-10-05 15:18:58 -0400888 while (node) {
889 struct btrfs_free_space *e;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400890
Josef Bacika67509c2011-10-05 15:18:58 -0400891 e = rb_entry(node, struct btrfs_free_space, offset_index);
892 entries++;
Josef Bacik43be2142011-04-01 14:55:00 +0000893
Josef Bacika67509c2011-10-05 15:18:58 -0400894 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
895 e->bitmap);
896 if (ret)
897 goto out_nospc;
898
899 if (e->bitmap) {
900 list_add_tail(&e->list, &bitmap_list);
901 bitmaps++;
902 }
903 node = rb_next(node);
904 if (!node && cluster) {
905 node = rb_first(&cluster->root);
906 cluster = NULL;
907 }
908 }
909
910 /*
911 * We want to add any pinned extents to our free space cache
912 * so we don't leak the space
913 */
Li Zefandb804f22012-01-10 16:41:01 +0800914
915 /*
916 * We shouldn't have switched the pinned extents yet so this is the
917 * right one
918 */
919 unpin = root->fs_info->pinned_extents;
920
921 if (block_group)
922 start = block_group->key.objectid;
923
Josef Bacika67509c2011-10-05 15:18:58 -0400924 while (block_group && (start < block_group->key.objectid +
925 block_group->key.offset)) {
Li Zefandb804f22012-01-10 16:41:01 +0800926 ret = find_first_extent_bit(unpin, start,
927 &extent_start, &extent_end,
Josef Bacika67509c2011-10-05 15:18:58 -0400928 EXTENT_DIRTY);
929 if (ret) {
930 ret = 0;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400931 break;
932 }
933
Josef Bacika67509c2011-10-05 15:18:58 -0400934 /* This pinned extent is out of our range */
Li Zefandb804f22012-01-10 16:41:01 +0800935 if (extent_start >= block_group->key.objectid +
Josef Bacika67509c2011-10-05 15:18:58 -0400936 block_group->key.offset)
937 break;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400938
Li Zefandb804f22012-01-10 16:41:01 +0800939 extent_start = max(extent_start, start);
940 extent_end = min(block_group->key.objectid +
941 block_group->key.offset, extent_end + 1);
942 len = extent_end - extent_start;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400943
Josef Bacika67509c2011-10-05 15:18:58 -0400944 entries++;
Li Zefandb804f22012-01-10 16:41:01 +0800945 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
Josef Bacika67509c2011-10-05 15:18:58 -0400946 if (ret)
947 goto out_nospc;
Josef Bacik2f356122011-06-10 15:31:13 -0400948
Li Zefandb804f22012-01-10 16:41:01 +0800949 start = extent_end;
Josef Bacika67509c2011-10-05 15:18:58 -0400950 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400951
952 /* Write out the bitmaps */
953 list_for_each_safe(pos, n, &bitmap_list) {
Josef Bacik0cb59c92010-07-02 12:14:14 -0400954 struct btrfs_free_space *entry =
955 list_entry(pos, struct btrfs_free_space, list);
956
Josef Bacika67509c2011-10-05 15:18:58 -0400957 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
958 if (ret)
959 goto out_nospc;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400960 list_del_init(&entry->list);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400961 }
962
Josef Bacik0cb59c92010-07-02 12:14:14 -0400963 /* Zero out the rest of the pages just to make sure */
Josef Bacika67509c2011-10-05 15:18:58 -0400964 io_ctl_zero_remaining_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400965
Josef Bacika67509c2011-10-05 15:18:58 -0400966 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
967 0, i_size_read(inode), &cached_state);
968 io_ctl_drop_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400969 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
970 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
971
Josef Bacikc09544e2011-08-30 10:19:10 -0400972 if (ret)
Josef Bacik2f356122011-06-10 15:31:13 -0400973 goto out;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400974
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400975
Josef Bacik549b4fd2011-10-05 16:33:53 -0400976 ret = filemap_write_and_wait(inode->i_mapping);
977 if (ret)
978 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400979
980 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800981 key.offset = offset;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400982 key.type = 0;
983
Josef Bacika9b5fcd2011-08-19 12:06:12 -0400984 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400985 if (ret < 0) {
Josef Bacika67509c2011-10-05 15:18:58 -0400986 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400987 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
988 GFP_NOFS);
Josef Bacik2f356122011-06-10 15:31:13 -0400989 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400990 }
991 leaf = path->nodes[0];
992 if (ret > 0) {
993 struct btrfs_key found_key;
994 BUG_ON(!path->slots[0]);
995 path->slots[0]--;
996 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
997 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
Li Zefan0414efa2011-04-20 10:20:14 +0800998 found_key.offset != offset) {
Josef Bacika67509c2011-10-05 15:18:58 -0400999 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1000 inode->i_size - 1,
Josef Bacik5b0e95b2011-10-06 08:58:24 -04001001 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1002 NULL, GFP_NOFS);
David Sterbab3b4aa72011-04-21 01:20:15 +02001003 btrfs_release_path(path);
Josef Bacik2f356122011-06-10 15:31:13 -04001004 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001005 }
1006 }
Josef Bacik549b4fd2011-10-05 16:33:53 -04001007
1008 BTRFS_I(inode)->generation = trans->transid;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001009 header = btrfs_item_ptr(leaf, path->slots[0],
1010 struct btrfs_free_space_header);
1011 btrfs_set_free_space_entries(leaf, header, entries);
1012 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1013 btrfs_set_free_space_generation(leaf, header, trans->transid);
1014 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +02001015 btrfs_release_path(path);
Josef Bacik0cb59c92010-07-02 12:14:14 -04001016
Josef Bacikc09544e2011-08-30 10:19:10 -04001017 err = 0;
Josef Bacik2f356122011-06-10 15:31:13 -04001018out:
Josef Bacika67509c2011-10-05 15:18:58 -04001019 io_ctl_free(&io_ctl);
Josef Bacikc09544e2011-08-30 10:19:10 -04001020 if (err) {
Josef Bacika67509c2011-10-05 15:18:58 -04001021 invalidate_inode_pages2(inode->i_mapping);
Josef Bacik0cb59c92010-07-02 12:14:14 -04001022 BTRFS_I(inode)->generation = 0;
1023 }
Josef Bacik0cb59c92010-07-02 12:14:14 -04001024 btrfs_update_inode(trans, root, inode);
Josef Bacikc09544e2011-08-30 10:19:10 -04001025 return err;
Josef Bacika67509c2011-10-05 15:18:58 -04001026
1027out_nospc:
1028 list_for_each_safe(pos, n, &bitmap_list) {
1029 struct btrfs_free_space *entry =
1030 list_entry(pos, struct btrfs_free_space, list);
1031 list_del_init(&entry->list);
1032 }
1033 io_ctl_drop_pages(&io_ctl);
1034 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1035 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1036 goto out;
Li Zefan0414efa2011-04-20 10:20:14 +08001037}
1038
1039int btrfs_write_out_cache(struct btrfs_root *root,
1040 struct btrfs_trans_handle *trans,
1041 struct btrfs_block_group_cache *block_group,
1042 struct btrfs_path *path)
1043{
1044 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1045 struct inode *inode;
1046 int ret = 0;
1047
1048 root = root->fs_info->tree_root;
1049
1050 spin_lock(&block_group->lock);
1051 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1052 spin_unlock(&block_group->lock);
1053 return 0;
1054 }
1055 spin_unlock(&block_group->lock);
1056
1057 inode = lookup_free_space_inode(root, block_group, path);
1058 if (IS_ERR(inode))
1059 return 0;
1060
1061 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1062 path, block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04001063 if (ret) {
Li Zefan0414efa2011-04-20 10:20:14 +08001064 spin_lock(&block_group->lock);
1065 block_group->disk_cache_state = BTRFS_DC_ERROR;
1066 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +08001067 ret = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -04001068#ifdef DEBUG
Masanari Iida934e7d42012-02-07 22:21:45 +09001069 printk(KERN_ERR "btrfs: failed to write free space cache "
Li Zefan0414efa2011-04-20 10:20:14 +08001070 "for block group %llu\n", block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04001071#endif
Li Zefan0414efa2011-04-20 10:20:14 +08001072 }
1073
Josef Bacik0cb59c92010-07-02 12:14:14 -04001074 iput(inode);
1075 return ret;
1076}
1077
Li Zefan34d52cb2011-03-29 13:46:06 +08001078static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
Josef Bacik96303082009-07-13 21:29:25 -04001079 u64 offset)
1080{
1081 BUG_ON(offset < bitmap_start);
1082 offset -= bitmap_start;
Li Zefan34d52cb2011-03-29 13:46:06 +08001083 return (unsigned long)(div_u64(offset, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001084}
1085
Li Zefan34d52cb2011-03-29 13:46:06 +08001086static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
Josef Bacik96303082009-07-13 21:29:25 -04001087{
Li Zefan34d52cb2011-03-29 13:46:06 +08001088 return (unsigned long)(div_u64(bytes, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001089}
1090
Li Zefan34d52cb2011-03-29 13:46:06 +08001091static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001092 u64 offset)
1093{
1094 u64 bitmap_start;
1095 u64 bytes_per_bitmap;
1096
Li Zefan34d52cb2011-03-29 13:46:06 +08001097 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1098 bitmap_start = offset - ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -04001099 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1100 bitmap_start *= bytes_per_bitmap;
Li Zefan34d52cb2011-03-29 13:46:06 +08001101 bitmap_start += ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -04001102
1103 return bitmap_start;
1104}
Josef Bacik0f9dd462008-09-23 13:14:11 -04001105
1106static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -04001107 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001108{
1109 struct rb_node **p = &root->rb_node;
1110 struct rb_node *parent = NULL;
1111 struct btrfs_free_space *info;
1112
1113 while (*p) {
1114 parent = *p;
1115 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1116
Josef Bacik96303082009-07-13 21:29:25 -04001117 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001118 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -04001119 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001120 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -04001121 } else {
1122 /*
1123 * we could have a bitmap entry and an extent entry
1124 * share the same offset. If this is the case, we want
1125 * the extent entry to always be found first if we do a
1126 * linear search through the tree, since we want to have
1127 * the quickest allocation time, and allocating from an
1128 * extent is faster than allocating from a bitmap. So
1129 * if we're inserting a bitmap and we find an entry at
1130 * this offset, we want to go right, or after this entry
1131 * logically. If we are inserting an extent and we've
1132 * found a bitmap, we want to go left, or before
1133 * logically.
1134 */
1135 if (bitmap) {
Josef Bacik207dde82011-05-13 14:49:23 -04001136 if (info->bitmap) {
1137 WARN_ON_ONCE(1);
1138 return -EEXIST;
1139 }
Josef Bacik96303082009-07-13 21:29:25 -04001140 p = &(*p)->rb_right;
1141 } else {
Josef Bacik207dde82011-05-13 14:49:23 -04001142 if (!info->bitmap) {
1143 WARN_ON_ONCE(1);
1144 return -EEXIST;
1145 }
Josef Bacik96303082009-07-13 21:29:25 -04001146 p = &(*p)->rb_left;
1147 }
1148 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001149 }
1150
1151 rb_link_node(node, parent, p);
1152 rb_insert_color(node, root);
1153
1154 return 0;
1155}
1156
1157/*
Josef Bacik70cb0742009-04-03 10:14:19 -04001158 * searches the tree for the given offset.
1159 *
Josef Bacik96303082009-07-13 21:29:25 -04001160 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1161 * want a section that has at least bytes size and comes at or after the given
1162 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -04001163 */
Josef Bacik96303082009-07-13 21:29:25 -04001164static struct btrfs_free_space *
Li Zefan34d52cb2011-03-29 13:46:06 +08001165tree_search_offset(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001166 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001167{
Li Zefan34d52cb2011-03-29 13:46:06 +08001168 struct rb_node *n = ctl->free_space_offset.rb_node;
Josef Bacik96303082009-07-13 21:29:25 -04001169 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001170
Josef Bacik96303082009-07-13 21:29:25 -04001171 /* find entry that is closest to the 'offset' */
1172 while (1) {
1173 if (!n) {
1174 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001175 break;
1176 }
Josef Bacik96303082009-07-13 21:29:25 -04001177
1178 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1179 prev = entry;
1180
1181 if (offset < entry->offset)
1182 n = n->rb_left;
1183 else if (offset > entry->offset)
1184 n = n->rb_right;
1185 else
1186 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001187 }
1188
Josef Bacik96303082009-07-13 21:29:25 -04001189 if (bitmap_only) {
1190 if (!entry)
1191 return NULL;
1192 if (entry->bitmap)
1193 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001194
Josef Bacik96303082009-07-13 21:29:25 -04001195 /*
1196 * bitmap entry and extent entry may share same offset,
1197 * in that case, bitmap entry comes after extent entry.
1198 */
1199 n = rb_next(n);
1200 if (!n)
1201 return NULL;
1202 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1203 if (entry->offset != offset)
1204 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001205
Josef Bacik96303082009-07-13 21:29:25 -04001206 WARN_ON(!entry->bitmap);
1207 return entry;
1208 } else if (entry) {
1209 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001210 /*
Josef Bacik96303082009-07-13 21:29:25 -04001211 * if previous extent entry covers the offset,
1212 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -04001213 */
Josef Bacik96303082009-07-13 21:29:25 -04001214 n = &entry->offset_index;
1215 while (1) {
1216 n = rb_prev(n);
1217 if (!n)
1218 break;
1219 prev = rb_entry(n, struct btrfs_free_space,
1220 offset_index);
1221 if (!prev->bitmap) {
1222 if (prev->offset + prev->bytes > offset)
1223 entry = prev;
1224 break;
1225 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001226 }
Josef Bacik96303082009-07-13 21:29:25 -04001227 }
1228 return entry;
1229 }
1230
1231 if (!prev)
1232 return NULL;
1233
1234 /* find last entry before the 'offset' */
1235 entry = prev;
1236 if (entry->offset > offset) {
1237 n = rb_prev(&entry->offset_index);
1238 if (n) {
1239 entry = rb_entry(n, struct btrfs_free_space,
1240 offset_index);
1241 BUG_ON(entry->offset > offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001242 } else {
Josef Bacik96303082009-07-13 21:29:25 -04001243 if (fuzzy)
1244 return entry;
1245 else
1246 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001247 }
1248 }
1249
Josef Bacik96303082009-07-13 21:29:25 -04001250 if (entry->bitmap) {
1251 n = &entry->offset_index;
1252 while (1) {
1253 n = rb_prev(n);
1254 if (!n)
1255 break;
1256 prev = rb_entry(n, struct btrfs_free_space,
1257 offset_index);
1258 if (!prev->bitmap) {
1259 if (prev->offset + prev->bytes > offset)
1260 return prev;
1261 break;
1262 }
1263 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001264 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001265 return entry;
1266 } else if (entry->offset + entry->bytes > offset)
1267 return entry;
1268
1269 if (!fuzzy)
1270 return NULL;
1271
1272 while (1) {
1273 if (entry->bitmap) {
1274 if (entry->offset + BITS_PER_BITMAP *
Li Zefan34d52cb2011-03-29 13:46:06 +08001275 ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001276 break;
1277 } else {
1278 if (entry->offset + entry->bytes > offset)
1279 break;
1280 }
1281
1282 n = rb_next(&entry->offset_index);
1283 if (!n)
1284 return NULL;
1285 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1286 }
1287 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001288}
1289
Li Zefanf333adb2010-11-09 14:57:39 +08001290static inline void
Li Zefan34d52cb2011-03-29 13:46:06 +08001291__unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001292 struct btrfs_free_space *info)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001293{
Li Zefan34d52cb2011-03-29 13:46:06 +08001294 rb_erase(&info->offset_index, &ctl->free_space_offset);
1295 ctl->free_extents--;
Li Zefanf333adb2010-11-09 14:57:39 +08001296}
1297
Li Zefan34d52cb2011-03-29 13:46:06 +08001298static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001299 struct btrfs_free_space *info)
1300{
Li Zefan34d52cb2011-03-29 13:46:06 +08001301 __unlink_free_space(ctl, info);
1302 ctl->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001303}
1304
Li Zefan34d52cb2011-03-29 13:46:06 +08001305static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0f9dd462008-09-23 13:14:11 -04001306 struct btrfs_free_space *info)
1307{
1308 int ret = 0;
1309
Josef Bacik96303082009-07-13 21:29:25 -04001310 BUG_ON(!info->bitmap && !info->bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001311 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001312 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -04001313 if (ret)
1314 return ret;
1315
Li Zefan34d52cb2011-03-29 13:46:06 +08001316 ctl->free_space += info->bytes;
1317 ctl->free_extents++;
Josef Bacik96303082009-07-13 21:29:25 -04001318 return ret;
1319}
1320
Li Zefan34d52cb2011-03-29 13:46:06 +08001321static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
Josef Bacik96303082009-07-13 21:29:25 -04001322{
Li Zefan34d52cb2011-03-29 13:46:06 +08001323 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik25891f72009-09-11 16:11:20 -04001324 u64 max_bytes;
1325 u64 bitmap_bytes;
1326 u64 extent_bytes;
Li Zefan8eb2d822010-11-09 14:48:01 +08001327 u64 size = block_group->key.offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001328 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1329 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1330
1331 BUG_ON(ctl->total_bitmaps > max_bitmaps);
Josef Bacik96303082009-07-13 21:29:25 -04001332
1333 /*
1334 * The goal is to keep the total amount of memory used per 1gb of space
1335 * at or below 32k, so we need to adjust how much memory we allow to be
1336 * used by extent based free space tracking
1337 */
Li Zefan8eb2d822010-11-09 14:48:01 +08001338 if (size < 1024 * 1024 * 1024)
1339 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1340 else
1341 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1342 div64_u64(size, 1024 * 1024 * 1024);
Josef Bacik96303082009-07-13 21:29:25 -04001343
Josef Bacik25891f72009-09-11 16:11:20 -04001344 /*
1345 * we want to account for 1 more bitmap than what we have so we can make
1346 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1347 * we add more bitmaps.
1348 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001349 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
Josef Bacik96303082009-07-13 21:29:25 -04001350
Josef Bacik25891f72009-09-11 16:11:20 -04001351 if (bitmap_bytes >= max_bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001352 ctl->extents_thresh = 0;
Josef Bacik25891f72009-09-11 16:11:20 -04001353 return;
Josef Bacik96303082009-07-13 21:29:25 -04001354 }
Josef Bacik25891f72009-09-11 16:11:20 -04001355
1356 /*
1357 * we want the extent entry threshold to always be at most 1/2 the maxw
1358 * bytes we can have, or whatever is less than that.
1359 */
1360 extent_bytes = max_bytes - bitmap_bytes;
1361 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1362
Li Zefan34d52cb2011-03-29 13:46:06 +08001363 ctl->extents_thresh =
Josef Bacik25891f72009-09-11 16:11:20 -04001364 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
Josef Bacik96303082009-07-13 21:29:25 -04001365}
1366
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001367static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1368 struct btrfs_free_space *info,
1369 u64 offset, u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001370{
Li Zefanf38b6e72011-03-14 13:40:51 +08001371 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001372
Li Zefan34d52cb2011-03-29 13:46:06 +08001373 start = offset_to_bit(info->offset, ctl->unit, offset);
1374 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001375 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001376
Li Zefanf38b6e72011-03-14 13:40:51 +08001377 bitmap_clear(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001378
1379 info->bytes -= bytes;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001380}
1381
1382static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1383 struct btrfs_free_space *info, u64 offset,
1384 u64 bytes)
1385{
1386 __bitmap_clear_bits(ctl, info, offset, bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001387 ctl->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001388}
1389
Li Zefan34d52cb2011-03-29 13:46:06 +08001390static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
Josef Bacik817d52f2009-07-13 21:29:25 -04001391 struct btrfs_free_space *info, u64 offset,
1392 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001393{
Li Zefanf38b6e72011-03-14 13:40:51 +08001394 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001395
Li Zefan34d52cb2011-03-29 13:46:06 +08001396 start = offset_to_bit(info->offset, ctl->unit, offset);
1397 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001398 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001399
Li Zefanf38b6e72011-03-14 13:40:51 +08001400 bitmap_set(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001401
1402 info->bytes += bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001403 ctl->free_space += bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001404}
1405
Li Zefan34d52cb2011-03-29 13:46:06 +08001406static int search_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001407 struct btrfs_free_space *bitmap_info, u64 *offset,
1408 u64 *bytes)
1409{
1410 unsigned long found_bits = 0;
1411 unsigned long bits, i;
1412 unsigned long next_zero;
1413
Li Zefan34d52cb2011-03-29 13:46:06 +08001414 i = offset_to_bit(bitmap_info->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04001415 max_t(u64, *offset, bitmap_info->offset));
Li Zefan34d52cb2011-03-29 13:46:06 +08001416 bits = bytes_to_bits(*bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04001417
1418 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1419 i < BITS_PER_BITMAP;
1420 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1421 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1422 BITS_PER_BITMAP, i);
1423 if ((next_zero - i) >= bits) {
1424 found_bits = next_zero - i;
1425 break;
1426 }
1427 i = next_zero;
1428 }
1429
1430 if (found_bits) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001431 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1432 *bytes = (u64)(found_bits) * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04001433 return 0;
1434 }
1435
1436 return -1;
1437}
1438
Li Zefan34d52cb2011-03-29 13:46:06 +08001439static struct btrfs_free_space *
1440find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001441{
1442 struct btrfs_free_space *entry;
1443 struct rb_node *node;
1444 int ret;
1445
Li Zefan34d52cb2011-03-29 13:46:06 +08001446 if (!ctl->free_space_offset.rb_node)
Josef Bacik96303082009-07-13 21:29:25 -04001447 return NULL;
1448
Li Zefan34d52cb2011-03-29 13:46:06 +08001449 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
Josef Bacik96303082009-07-13 21:29:25 -04001450 if (!entry)
1451 return NULL;
1452
1453 for (node = &entry->offset_index; node; node = rb_next(node)) {
1454 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1455 if (entry->bytes < *bytes)
1456 continue;
1457
1458 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001459 ret = search_bitmap(ctl, entry, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001460 if (!ret)
1461 return entry;
1462 continue;
1463 }
1464
1465 *offset = entry->offset;
1466 *bytes = entry->bytes;
1467 return entry;
1468 }
1469
1470 return NULL;
1471}
1472
Li Zefan34d52cb2011-03-29 13:46:06 +08001473static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001474 struct btrfs_free_space *info, u64 offset)
1475{
Li Zefan34d52cb2011-03-29 13:46:06 +08001476 info->offset = offset_to_bitmap(ctl, offset);
Josef Bacikf019f422009-09-11 16:11:20 -04001477 info->bytes = 0;
Alexandre Olivaf2d0f672011-11-28 12:04:43 -02001478 INIT_LIST_HEAD(&info->list);
Li Zefan34d52cb2011-03-29 13:46:06 +08001479 link_free_space(ctl, info);
1480 ctl->total_bitmaps++;
Josef Bacik96303082009-07-13 21:29:25 -04001481
Li Zefan34d52cb2011-03-29 13:46:06 +08001482 ctl->op->recalc_thresholds(ctl);
Josef Bacik96303082009-07-13 21:29:25 -04001483}
1484
Li Zefan34d52cb2011-03-29 13:46:06 +08001485static void free_bitmap(struct btrfs_free_space_ctl *ctl,
Li Zefanedf6e2d2010-11-09 14:50:07 +08001486 struct btrfs_free_space *bitmap_info)
1487{
Li Zefan34d52cb2011-03-29 13:46:06 +08001488 unlink_free_space(ctl, bitmap_info);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001489 kfree(bitmap_info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001490 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
Li Zefan34d52cb2011-03-29 13:46:06 +08001491 ctl->total_bitmaps--;
1492 ctl->op->recalc_thresholds(ctl);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001493}
1494
Li Zefan34d52cb2011-03-29 13:46:06 +08001495static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001496 struct btrfs_free_space *bitmap_info,
1497 u64 *offset, u64 *bytes)
1498{
1499 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -04001500 u64 search_start, search_bytes;
1501 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001502
1503again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001504 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001505
Josef Bacik6606bb92009-07-31 11:03:58 -04001506 /*
1507 * XXX - this can go away after a few releases.
1508 *
1509 * since the only user of btrfs_remove_free_space is the tree logging
1510 * stuff, and the only way to test that is under crash conditions, we
1511 * want to have this debug stuff here just in case somethings not
1512 * working. Search the bitmap for the space we are trying to use to
1513 * make sure its actually there. If its not there then we need to stop
1514 * because something has gone wrong.
1515 */
1516 search_start = *offset;
1517 search_bytes = *bytes;
Josef Bacik13dbc082011-02-03 02:39:52 +00001518 search_bytes = min(search_bytes, end - search_start + 1);
Li Zefan34d52cb2011-03-29 13:46:06 +08001519 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
Josef Bacik6606bb92009-07-31 11:03:58 -04001520 BUG_ON(ret < 0 || search_start != *offset);
1521
Josef Bacik96303082009-07-13 21:29:25 -04001522 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001523 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
Josef Bacik96303082009-07-13 21:29:25 -04001524 *bytes -= end - *offset + 1;
1525 *offset = end + 1;
1526 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001527 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001528 *bytes = 0;
1529 }
1530
1531 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001532 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001533 if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001534 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001535
Josef Bacik6606bb92009-07-31 11:03:58 -04001536 /*
1537 * no entry after this bitmap, but we still have bytes to
1538 * remove, so something has gone wrong.
1539 */
1540 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -04001541 return -EINVAL;
1542
Josef Bacik6606bb92009-07-31 11:03:58 -04001543 bitmap_info = rb_entry(next, struct btrfs_free_space,
1544 offset_index);
1545
1546 /*
1547 * if the next entry isn't a bitmap we need to return to let the
1548 * extent stuff do its work.
1549 */
Josef Bacik96303082009-07-13 21:29:25 -04001550 if (!bitmap_info->bitmap)
1551 return -EAGAIN;
1552
Josef Bacik6606bb92009-07-31 11:03:58 -04001553 /*
1554 * Ok the next item is a bitmap, but it may not actually hold
1555 * the information for the rest of this free space stuff, so
1556 * look for it, and if we don't find it return so we can try
1557 * everything over again.
1558 */
1559 search_start = *offset;
1560 search_bytes = *bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001561 ret = search_bitmap(ctl, bitmap_info, &search_start,
Josef Bacik6606bb92009-07-31 11:03:58 -04001562 &search_bytes);
1563 if (ret < 0 || search_start != *offset)
1564 return -EAGAIN;
1565
Josef Bacik96303082009-07-13 21:29:25 -04001566 goto again;
Li Zefanedf6e2d2010-11-09 14:50:07 +08001567 } else if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001568 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001569
1570 return 0;
1571}
1572
Josef Bacik2cdc3422011-05-27 14:07:49 -04001573static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1574 struct btrfs_free_space *info, u64 offset,
1575 u64 bytes)
1576{
1577 u64 bytes_to_set = 0;
1578 u64 end;
1579
1580 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1581
1582 bytes_to_set = min(end - offset, bytes);
1583
1584 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1585
1586 return bytes_to_set;
1587
1588}
1589
Li Zefan34d52cb2011-03-29 13:46:06 +08001590static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1591 struct btrfs_free_space *info)
Josef Bacik96303082009-07-13 21:29:25 -04001592{
Li Zefan34d52cb2011-03-29 13:46:06 +08001593 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik96303082009-07-13 21:29:25 -04001594
1595 /*
1596 * If we are below the extents threshold then we can add this as an
1597 * extent, and don't have to deal with the bitmap
1598 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001599 if (ctl->free_extents < ctl->extents_thresh) {
Josef Bacik32cb0842011-03-18 16:16:21 -04001600 /*
1601 * If this block group has some small extents we don't want to
1602 * use up all of our free slots in the cache with them, we want
1603 * to reserve them to larger extents, however if we have plent
1604 * of cache left then go ahead an dadd them, no sense in adding
1605 * the overhead of a bitmap if we don't have to.
1606 */
1607 if (info->bytes <= block_group->sectorsize * 4) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001608 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1609 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001610 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001611 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001612 }
1613 }
Josef Bacik96303082009-07-13 21:29:25 -04001614
1615 /*
1616 * some block groups are so tiny they can't be enveloped by a bitmap, so
1617 * don't even bother to create a bitmap for this
1618 */
1619 if (BITS_PER_BITMAP * block_group->sectorsize >
1620 block_group->key.offset)
Li Zefan34d52cb2011-03-29 13:46:06 +08001621 return false;
1622
1623 return true;
1624}
1625
Josef Bacik2cdc3422011-05-27 14:07:49 -04001626static struct btrfs_free_space_op free_space_op = {
1627 .recalc_thresholds = recalculate_thresholds,
1628 .use_bitmap = use_bitmap,
1629};
1630
Li Zefan34d52cb2011-03-29 13:46:06 +08001631static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1632 struct btrfs_free_space *info)
1633{
1634 struct btrfs_free_space *bitmap_info;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001635 struct btrfs_block_group_cache *block_group = NULL;
Li Zefan34d52cb2011-03-29 13:46:06 +08001636 int added = 0;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001637 u64 bytes, offset, bytes_added;
Li Zefan34d52cb2011-03-29 13:46:06 +08001638 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001639
1640 bytes = info->bytes;
1641 offset = info->offset;
1642
Li Zefan34d52cb2011-03-29 13:46:06 +08001643 if (!ctl->op->use_bitmap(ctl, info))
1644 return 0;
1645
Josef Bacik2cdc3422011-05-27 14:07:49 -04001646 if (ctl->op == &free_space_op)
1647 block_group = ctl->private;
Chris Mason38e87882011-06-10 16:36:57 -04001648again:
Josef Bacik2cdc3422011-05-27 14:07:49 -04001649 /*
1650 * Since we link bitmaps right into the cluster we need to see if we
1651 * have a cluster here, and if so and it has our bitmap we need to add
1652 * the free space to that bitmap.
1653 */
1654 if (block_group && !list_empty(&block_group->cluster_list)) {
1655 struct btrfs_free_cluster *cluster;
1656 struct rb_node *node;
1657 struct btrfs_free_space *entry;
1658
1659 cluster = list_entry(block_group->cluster_list.next,
1660 struct btrfs_free_cluster,
1661 block_group_list);
1662 spin_lock(&cluster->lock);
1663 node = rb_first(&cluster->root);
1664 if (!node) {
1665 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001666 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001667 }
1668
1669 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1670 if (!entry->bitmap) {
1671 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001672 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001673 }
1674
1675 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1676 bytes_added = add_bytes_to_bitmap(ctl, entry,
1677 offset, bytes);
1678 bytes -= bytes_added;
1679 offset += bytes_added;
1680 }
1681 spin_unlock(&cluster->lock);
1682 if (!bytes) {
1683 ret = 1;
1684 goto out;
1685 }
1686 }
Chris Mason38e87882011-06-10 16:36:57 -04001687
1688no_cluster_bitmap:
Li Zefan34d52cb2011-03-29 13:46:06 +08001689 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik96303082009-07-13 21:29:25 -04001690 1, 0);
1691 if (!bitmap_info) {
1692 BUG_ON(added);
1693 goto new_bitmap;
1694 }
1695
Josef Bacik2cdc3422011-05-27 14:07:49 -04001696 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1697 bytes -= bytes_added;
1698 offset += bytes_added;
1699 added = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001700
1701 if (!bytes) {
1702 ret = 1;
1703 goto out;
1704 } else
1705 goto again;
1706
1707new_bitmap:
1708 if (info && info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001709 add_new_bitmap(ctl, info, offset);
Josef Bacik96303082009-07-13 21:29:25 -04001710 added = 1;
1711 info = NULL;
1712 goto again;
1713 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001714 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001715
1716 /* no pre-allocated info, allocate a new one */
1717 if (!info) {
Josef Bacikdc89e982011-01-28 17:05:48 -05001718 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1719 GFP_NOFS);
Josef Bacik96303082009-07-13 21:29:25 -04001720 if (!info) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001721 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001722 ret = -ENOMEM;
1723 goto out;
1724 }
1725 }
1726
1727 /* allocate the bitmap */
1728 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
Li Zefan34d52cb2011-03-29 13:46:06 +08001729 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001730 if (!info->bitmap) {
1731 ret = -ENOMEM;
1732 goto out;
1733 }
1734 goto again;
1735 }
1736
1737out:
1738 if (info) {
1739 if (info->bitmap)
1740 kfree(info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001741 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001742 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001743
1744 return ret;
1745}
1746
Chris Mason945d8962011-05-22 12:33:42 -04001747static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001748 struct btrfs_free_space *info, bool update_stat)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001749{
Li Zefan120d66e2010-11-09 14:56:50 +08001750 struct btrfs_free_space *left_info;
1751 struct btrfs_free_space *right_info;
1752 bool merged = false;
1753 u64 offset = info->offset;
1754 u64 bytes = info->bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001755
Josef Bacik0f9dd462008-09-23 13:14:11 -04001756 /*
1757 * first we want to see if there is free space adjacent to the range we
1758 * are adding, if there is remove that struct and add a new one to
1759 * cover the entire range
1760 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001761 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001762 if (right_info && rb_prev(&right_info->offset_index))
1763 left_info = rb_entry(rb_prev(&right_info->offset_index),
1764 struct btrfs_free_space, offset_index);
1765 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001766 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001767
Josef Bacik96303082009-07-13 21:29:25 -04001768 if (right_info && !right_info->bitmap) {
Li Zefanf333adb2010-11-09 14:57:39 +08001769 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001770 unlink_free_space(ctl, right_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001771 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001772 __unlink_free_space(ctl, right_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001773 info->bytes += right_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001774 kmem_cache_free(btrfs_free_space_cachep, right_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001775 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001776 }
1777
Josef Bacik96303082009-07-13 21:29:25 -04001778 if (left_info && !left_info->bitmap &&
1779 left_info->offset + left_info->bytes == offset) {
Li Zefanf333adb2010-11-09 14:57:39 +08001780 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001781 unlink_free_space(ctl, left_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001782 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001783 __unlink_free_space(ctl, left_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001784 info->offset = left_info->offset;
1785 info->bytes += left_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001786 kmem_cache_free(btrfs_free_space_cachep, left_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001787 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001788 }
1789
Li Zefan120d66e2010-11-09 14:56:50 +08001790 return merged;
1791}
1792
Li Zefan581bb052011-04-20 10:06:11 +08001793int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1794 u64 offset, u64 bytes)
Li Zefan120d66e2010-11-09 14:56:50 +08001795{
1796 struct btrfs_free_space *info;
1797 int ret = 0;
1798
Josef Bacikdc89e982011-01-28 17:05:48 -05001799 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
Li Zefan120d66e2010-11-09 14:56:50 +08001800 if (!info)
1801 return -ENOMEM;
1802
1803 info->offset = offset;
1804 info->bytes = bytes;
1805
Li Zefan34d52cb2011-03-29 13:46:06 +08001806 spin_lock(&ctl->tree_lock);
Li Zefan120d66e2010-11-09 14:56:50 +08001807
Li Zefan34d52cb2011-03-29 13:46:06 +08001808 if (try_merge_free_space(ctl, info, true))
Li Zefan120d66e2010-11-09 14:56:50 +08001809 goto link;
1810
1811 /*
1812 * There was no extent directly to the left or right of this new
1813 * extent then we know we're going to have to allocate a new extent, so
1814 * before we do that see if we need to drop this into a bitmap
1815 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001816 ret = insert_into_bitmap(ctl, info);
Li Zefan120d66e2010-11-09 14:56:50 +08001817 if (ret < 0) {
1818 goto out;
1819 } else if (ret) {
1820 ret = 0;
1821 goto out;
1822 }
1823link:
Li Zefan34d52cb2011-03-29 13:46:06 +08001824 ret = link_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001825 if (ret)
Josef Bacikdc89e982011-01-28 17:05:48 -05001826 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001827out:
Li Zefan34d52cb2011-03-29 13:46:06 +08001828 spin_unlock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001829
Josef Bacik0f9dd462008-09-23 13:14:11 -04001830 if (ret) {
Josef Bacik96303082009-07-13 21:29:25 -04001831 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04001832 BUG_ON(ret == -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001833 }
1834
Josef Bacik0f9dd462008-09-23 13:14:11 -04001835 return ret;
1836}
1837
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001838int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1839 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001840{
Li Zefan34d52cb2011-03-29 13:46:06 +08001841 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001842 struct btrfs_free_space *info;
Josef Bacik96303082009-07-13 21:29:25 -04001843 struct btrfs_free_space *next_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001844 int ret = 0;
1845
Li Zefan34d52cb2011-03-29 13:46:06 +08001846 spin_lock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001847
Josef Bacik96303082009-07-13 21:29:25 -04001848again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001849 info = tree_search_offset(ctl, offset, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001850 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001851 /*
1852 * oops didn't find an extent that matched the space we wanted
1853 * to remove, look for a bitmap instead
1854 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001855 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik6606bb92009-07-31 11:03:58 -04001856 1, 0);
1857 if (!info) {
Chris Mason24a70312011-11-21 09:39:11 -05001858 /* the tree logging code might be calling us before we
1859 * have fully loaded the free space rbtree for this
1860 * block group. So it is possible the entry won't
1861 * be in the rbtree yet at all. The caching code
1862 * will make sure not to put it in the rbtree if
1863 * the logging code has pinned it.
1864 */
Josef Bacik6606bb92009-07-31 11:03:58 -04001865 goto out_lock;
1866 }
Josef Bacik96303082009-07-13 21:29:25 -04001867 }
1868
1869 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1870 u64 end;
1871 next_info = rb_entry(rb_next(&info->offset_index),
1872 struct btrfs_free_space,
1873 offset_index);
1874
1875 if (next_info->bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001876 end = next_info->offset +
1877 BITS_PER_BITMAP * ctl->unit - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001878 else
1879 end = next_info->offset + next_info->bytes;
1880
1881 if (next_info->bytes < bytes ||
1882 next_info->offset > offset || offset > end) {
1883 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1884 " trying to use %llu\n",
1885 (unsigned long long)info->offset,
1886 (unsigned long long)info->bytes,
1887 (unsigned long long)bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001888 WARN_ON(1);
1889 ret = -EINVAL;
Josef Bacik96303082009-07-13 21:29:25 -04001890 goto out_lock;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001891 }
Josef Bacik96303082009-07-13 21:29:25 -04001892
1893 info = next_info;
1894 }
1895
1896 if (info->bytes == bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001897 unlink_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001898 if (info->bitmap) {
1899 kfree(info->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08001900 ctl->total_bitmaps--;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001901 }
Josef Bacikdc89e982011-01-28 17:05:48 -05001902 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason1eae31e2011-10-14 06:31:20 -04001903 ret = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001904 goto out_lock;
1905 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001906
Josef Bacik96303082009-07-13 21:29:25 -04001907 if (!info->bitmap && info->offset == offset) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001908 unlink_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001909 info->offset += bytes;
1910 info->bytes -= bytes;
Chris Mason1eae31e2011-10-14 06:31:20 -04001911 ret = link_free_space(ctl, info);
1912 WARN_ON(ret);
Josef Bacik96303082009-07-13 21:29:25 -04001913 goto out_lock;
1914 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001915
Josef Bacik96303082009-07-13 21:29:25 -04001916 if (!info->bitmap && info->offset <= offset &&
1917 info->offset + info->bytes >= offset + bytes) {
Chris Mason9b49c9b2008-09-24 11:23:25 -04001918 u64 old_start = info->offset;
1919 /*
1920 * we're freeing space in the middle of the info,
1921 * this can happen during tree log replay
1922 *
1923 * first unlink the old info and then
1924 * insert it again after the hole we're creating
1925 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001926 unlink_free_space(ctl, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001927 if (offset + bytes < info->offset + info->bytes) {
1928 u64 old_end = info->offset + info->bytes;
1929
1930 info->offset = offset + bytes;
1931 info->bytes = old_end - info->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001932 ret = link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001933 WARN_ON(ret);
1934 if (ret)
1935 goto out_lock;
Chris Mason9b49c9b2008-09-24 11:23:25 -04001936 } else {
1937 /* the hole we're creating ends at the end
1938 * of the info struct, just free the info
1939 */
Josef Bacikdc89e982011-01-28 17:05:48 -05001940 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001941 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001942 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001943
1944 /* step two, insert a new info struct to cover
1945 * anything before the hole
Chris Mason9b49c9b2008-09-24 11:23:25 -04001946 */
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001947 ret = btrfs_add_free_space(block_group, old_start,
1948 offset - old_start);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001949 WARN_ON(ret); /* -ENOMEM */
Josef Bacik96303082009-07-13 21:29:25 -04001950 goto out;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001951 }
Josef Bacik96303082009-07-13 21:29:25 -04001952
Li Zefan34d52cb2011-03-29 13:46:06 +08001953 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001954 if (ret == -EAGAIN)
1955 goto again;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001956 BUG_ON(ret); /* logic error */
Josef Bacik96303082009-07-13 21:29:25 -04001957out_lock:
Li Zefan34d52cb2011-03-29 13:46:06 +08001958 spin_unlock(&ctl->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001959out:
Josef Bacik25179202008-10-29 14:49:05 -04001960 return ret;
1961}
1962
Josef Bacik0f9dd462008-09-23 13:14:11 -04001963void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1964 u64 bytes)
1965{
Li Zefan34d52cb2011-03-29 13:46:06 +08001966 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001967 struct btrfs_free_space *info;
1968 struct rb_node *n;
1969 int count = 0;
1970
Li Zefan34d52cb2011-03-29 13:46:06 +08001971 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001972 info = rb_entry(n, struct btrfs_free_space, offset_index);
1973 if (info->bytes >= bytes)
1974 count++;
Josef Bacik96303082009-07-13 21:29:25 -04001975 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
Joel Becker21380932009-04-21 12:38:29 -07001976 (unsigned long long)info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001977 (unsigned long long)info->bytes,
1978 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001979 }
Josef Bacik96303082009-07-13 21:29:25 -04001980 printk(KERN_INFO "block group has cluster?: %s\n",
1981 list_empty(&block_group->cluster_list) ? "no" : "yes");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001982 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1983 "\n", count);
1984}
1985
Li Zefan34d52cb2011-03-29 13:46:06 +08001986void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001987{
Li Zefan34d52cb2011-03-29 13:46:06 +08001988 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001989
Li Zefan34d52cb2011-03-29 13:46:06 +08001990 spin_lock_init(&ctl->tree_lock);
1991 ctl->unit = block_group->sectorsize;
1992 ctl->start = block_group->key.objectid;
1993 ctl->private = block_group;
1994 ctl->op = &free_space_op;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001995
Li Zefan34d52cb2011-03-29 13:46:06 +08001996 /*
1997 * we only want to have 32k of ram per block group for keeping
1998 * track of free space, and if we pass 1/2 of that we want to
1999 * start converting things over to using bitmaps
2000 */
2001 ctl->extents_thresh = ((1024 * 32) / 2) /
2002 sizeof(struct btrfs_free_space);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002003}
2004
Chris Masonfa9c0d792009-04-03 09:47:43 -04002005/*
2006 * for a given cluster, put all of its extents back into the free
2007 * space cache. If the block group passed doesn't match the block group
2008 * pointed to by the cluster, someone else raced in and freed the
2009 * cluster already. In that case, we just return without changing anything
2010 */
2011static int
2012__btrfs_return_cluster_to_free_space(
2013 struct btrfs_block_group_cache *block_group,
2014 struct btrfs_free_cluster *cluster)
2015{
Li Zefan34d52cb2011-03-29 13:46:06 +08002016 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002017 struct btrfs_free_space *entry;
2018 struct rb_node *node;
2019
2020 spin_lock(&cluster->lock);
2021 if (cluster->block_group != block_group)
2022 goto out;
2023
Josef Bacik96303082009-07-13 21:29:25 -04002024 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002025 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002026 list_del_init(&cluster->block_group_list);
Josef Bacik96303082009-07-13 21:29:25 -04002027
Chris Masonfa9c0d792009-04-03 09:47:43 -04002028 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -04002029 while (node) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002030 bool bitmap;
2031
Chris Masonfa9c0d792009-04-03 09:47:43 -04002032 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2033 node = rb_next(&entry->offset_index);
2034 rb_erase(&entry->offset_index, &cluster->root);
Josef Bacik4e69b592011-03-21 10:11:24 -04002035
2036 bitmap = (entry->bitmap != NULL);
2037 if (!bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08002038 try_merge_free_space(ctl, entry, false);
2039 tree_insert_offset(&ctl->free_space_offset,
Josef Bacik4e69b592011-03-21 10:11:24 -04002040 entry->offset, &entry->offset_index, bitmap);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002041 }
Eric Paris6bef4d32010-02-23 19:43:04 +00002042 cluster->root = RB_ROOT;
Josef Bacik96303082009-07-13 21:29:25 -04002043
Chris Masonfa9c0d792009-04-03 09:47:43 -04002044out:
2045 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002046 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002047 return 0;
2048}
2049
Chris Mason09655372011-05-21 09:27:38 -04002050void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002051{
2052 struct btrfs_free_space *info;
2053 struct rb_node *node;
Li Zefan581bb052011-04-20 10:06:11 +08002054
Li Zefan581bb052011-04-20 10:06:11 +08002055 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2056 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik9b90f512011-06-24 16:02:51 +00002057 if (!info->bitmap) {
2058 unlink_free_space(ctl, info);
2059 kmem_cache_free(btrfs_free_space_cachep, info);
2060 } else {
2061 free_bitmap(ctl, info);
2062 }
Li Zefan581bb052011-04-20 10:06:11 +08002063 if (need_resched()) {
2064 spin_unlock(&ctl->tree_lock);
2065 cond_resched();
2066 spin_lock(&ctl->tree_lock);
2067 }
2068 }
Chris Mason09655372011-05-21 09:27:38 -04002069}
2070
2071void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2072{
2073 spin_lock(&ctl->tree_lock);
2074 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan581bb052011-04-20 10:06:11 +08002075 spin_unlock(&ctl->tree_lock);
2076}
2077
Josef Bacik0f9dd462008-09-23 13:14:11 -04002078void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2079{
Li Zefan34d52cb2011-03-29 13:46:06 +08002080 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002081 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -04002082 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002083
Li Zefan34d52cb2011-03-29 13:46:06 +08002084 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002085 while ((head = block_group->cluster_list.next) !=
2086 &block_group->cluster_list) {
2087 cluster = list_entry(head, struct btrfs_free_cluster,
2088 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002089
2090 WARN_ON(cluster->block_group != block_group);
2091 __btrfs_return_cluster_to_free_space(block_group, cluster);
Josef Bacik96303082009-07-13 21:29:25 -04002092 if (need_resched()) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002093 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002094 cond_resched();
Li Zefan34d52cb2011-03-29 13:46:06 +08002095 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002096 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002097 }
Chris Mason09655372011-05-21 09:27:38 -04002098 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan34d52cb2011-03-29 13:46:06 +08002099 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002100
Josef Bacik0f9dd462008-09-23 13:14:11 -04002101}
2102
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002103u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2104 u64 offset, u64 bytes, u64 empty_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002105{
Li Zefan34d52cb2011-03-29 13:46:06 +08002106 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002107 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -04002108 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002109 u64 ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002110
Li Zefan34d52cb2011-03-29 13:46:06 +08002111 spin_lock(&ctl->tree_lock);
2112 entry = find_free_space(ctl, &offset, &bytes_search);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002113 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -04002114 goto out;
2115
2116 ret = offset;
2117 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002118 bitmap_clear_bits(ctl, entry, offset, bytes);
Li Zefanedf6e2d2010-11-09 14:50:07 +08002119 if (!entry->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08002120 free_bitmap(ctl, entry);
Josef Bacik96303082009-07-13 21:29:25 -04002121 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08002122 unlink_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002123 entry->offset += bytes;
2124 entry->bytes -= bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002125 if (!entry->bytes)
Josef Bacikdc89e982011-01-28 17:05:48 -05002126 kmem_cache_free(btrfs_free_space_cachep, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002127 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002128 link_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002129 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002130
Josef Bacik96303082009-07-13 21:29:25 -04002131out:
Li Zefan34d52cb2011-03-29 13:46:06 +08002132 spin_unlock(&ctl->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04002133
Josef Bacik0f9dd462008-09-23 13:14:11 -04002134 return ret;
2135}
Chris Masonfa9c0d792009-04-03 09:47:43 -04002136
2137/*
2138 * given a cluster, put all of its extents back into the free space
2139 * cache. If a block group is passed, this function will only free
2140 * a cluster that belongs to the passed block group.
2141 *
2142 * Otherwise, it'll get a reference on the block group pointed to by the
2143 * cluster and remove the cluster from it.
2144 */
2145int btrfs_return_cluster_to_free_space(
2146 struct btrfs_block_group_cache *block_group,
2147 struct btrfs_free_cluster *cluster)
2148{
Li Zefan34d52cb2011-03-29 13:46:06 +08002149 struct btrfs_free_space_ctl *ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002150 int ret;
2151
2152 /* first, get a safe pointer to the block group */
2153 spin_lock(&cluster->lock);
2154 if (!block_group) {
2155 block_group = cluster->block_group;
2156 if (!block_group) {
2157 spin_unlock(&cluster->lock);
2158 return 0;
2159 }
2160 } else if (cluster->block_group != block_group) {
2161 /* someone else has already freed it don't redo their work */
2162 spin_unlock(&cluster->lock);
2163 return 0;
2164 }
2165 atomic_inc(&block_group->count);
2166 spin_unlock(&cluster->lock);
2167
Li Zefan34d52cb2011-03-29 13:46:06 +08002168 ctl = block_group->free_space_ctl;
2169
Chris Masonfa9c0d792009-04-03 09:47:43 -04002170 /* now return any extents the cluster had on it */
Li Zefan34d52cb2011-03-29 13:46:06 +08002171 spin_lock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002172 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
Li Zefan34d52cb2011-03-29 13:46:06 +08002173 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002174
2175 /* finally drop our ref */
2176 btrfs_put_block_group(block_group);
2177 return ret;
2178}
2179
Josef Bacik96303082009-07-13 21:29:25 -04002180static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2181 struct btrfs_free_cluster *cluster,
Josef Bacik4e69b592011-03-21 10:11:24 -04002182 struct btrfs_free_space *entry,
Josef Bacik96303082009-07-13 21:29:25 -04002183 u64 bytes, u64 min_start)
2184{
Li Zefan34d52cb2011-03-29 13:46:06 +08002185 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002186 int err;
2187 u64 search_start = cluster->window_start;
2188 u64 search_bytes = bytes;
2189 u64 ret = 0;
2190
Josef Bacik96303082009-07-13 21:29:25 -04002191 search_start = min_start;
2192 search_bytes = bytes;
2193
Li Zefan34d52cb2011-03-29 13:46:06 +08002194 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002195 if (err)
Josef Bacik4e69b592011-03-21 10:11:24 -04002196 return 0;
Josef Bacik96303082009-07-13 21:29:25 -04002197
2198 ret = search_start;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00002199 __bitmap_clear_bits(ctl, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002200
2201 return ret;
2202}
2203
Chris Masonfa9c0d792009-04-03 09:47:43 -04002204/*
2205 * given a cluster, try to allocate 'bytes' from it, returns 0
2206 * if it couldn't find anything suitably large, or a logical disk offset
2207 * if things worked out
2208 */
2209u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2210 struct btrfs_free_cluster *cluster, u64 bytes,
2211 u64 min_start)
2212{
Li Zefan34d52cb2011-03-29 13:46:06 +08002213 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002214 struct btrfs_free_space *entry = NULL;
2215 struct rb_node *node;
2216 u64 ret = 0;
2217
2218 spin_lock(&cluster->lock);
2219 if (bytes > cluster->max_size)
2220 goto out;
2221
2222 if (cluster->block_group != block_group)
2223 goto out;
2224
2225 node = rb_first(&cluster->root);
2226 if (!node)
2227 goto out;
2228
2229 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002230 while(1) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002231 if (entry->bytes < bytes ||
2232 (!entry->bitmap && entry->offset < min_start)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002233 node = rb_next(&entry->offset_index);
2234 if (!node)
2235 break;
2236 entry = rb_entry(node, struct btrfs_free_space,
2237 offset_index);
2238 continue;
2239 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002240
Josef Bacik4e69b592011-03-21 10:11:24 -04002241 if (entry->bitmap) {
2242 ret = btrfs_alloc_from_bitmap(block_group,
2243 cluster, entry, bytes,
Josef Bacik0b4a9d22012-01-26 15:01:11 -05002244 cluster->window_start);
Josef Bacik4e69b592011-03-21 10:11:24 -04002245 if (ret == 0) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002246 node = rb_next(&entry->offset_index);
2247 if (!node)
2248 break;
2249 entry = rb_entry(node, struct btrfs_free_space,
2250 offset_index);
2251 continue;
2252 }
Josef Bacik9b230622012-01-26 15:01:12 -05002253 cluster->window_start += bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04002254 } else {
Josef Bacik4e69b592011-03-21 10:11:24 -04002255 ret = entry->offset;
2256
2257 entry->offset += bytes;
2258 entry->bytes -= bytes;
2259 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002260
Li Zefan5e71b5d2010-11-09 14:55:34 +08002261 if (entry->bytes == 0)
Chris Masonfa9c0d792009-04-03 09:47:43 -04002262 rb_erase(&entry->offset_index, &cluster->root);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002263 break;
2264 }
2265out:
2266 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002267
Li Zefan5e71b5d2010-11-09 14:55:34 +08002268 if (!ret)
2269 return 0;
2270
Li Zefan34d52cb2011-03-29 13:46:06 +08002271 spin_lock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002272
Li Zefan34d52cb2011-03-29 13:46:06 +08002273 ctl->free_space -= bytes;
Li Zefan5e71b5d2010-11-09 14:55:34 +08002274 if (entry->bytes == 0) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002275 ctl->free_extents--;
Josef Bacik4e69b592011-03-21 10:11:24 -04002276 if (entry->bitmap) {
2277 kfree(entry->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08002278 ctl->total_bitmaps--;
2279 ctl->op->recalc_thresholds(ctl);
Josef Bacik4e69b592011-03-21 10:11:24 -04002280 }
Josef Bacikdc89e982011-01-28 17:05:48 -05002281 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002282 }
2283
Li Zefan34d52cb2011-03-29 13:46:06 +08002284 spin_unlock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002285
Chris Masonfa9c0d792009-04-03 09:47:43 -04002286 return ret;
2287}
2288
Josef Bacik96303082009-07-13 21:29:25 -04002289static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2290 struct btrfs_free_space *entry,
2291 struct btrfs_free_cluster *cluster,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002292 u64 offset, u64 bytes,
2293 u64 cont1_bytes, u64 min_bytes)
Josef Bacik96303082009-07-13 21:29:25 -04002294{
Li Zefan34d52cb2011-03-29 13:46:06 +08002295 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002296 unsigned long next_zero;
2297 unsigned long i;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002298 unsigned long want_bits;
2299 unsigned long min_bits;
Josef Bacik96303082009-07-13 21:29:25 -04002300 unsigned long found_bits;
2301 unsigned long start = 0;
2302 unsigned long total_found = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002303 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04002304
2305 i = offset_to_bit(entry->offset, block_group->sectorsize,
2306 max_t(u64, offset, entry->offset));
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002307 want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2308 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -04002309
2310again:
2311 found_bits = 0;
2312 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2313 i < BITS_PER_BITMAP;
2314 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2315 next_zero = find_next_zero_bit(entry->bitmap,
2316 BITS_PER_BITMAP, i);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002317 if (next_zero - i >= min_bits) {
Josef Bacik96303082009-07-13 21:29:25 -04002318 found_bits = next_zero - i;
2319 break;
2320 }
2321 i = next_zero;
2322 }
2323
2324 if (!found_bits)
Josef Bacik4e69b592011-03-21 10:11:24 -04002325 return -ENOSPC;
Josef Bacik96303082009-07-13 21:29:25 -04002326
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002327 if (!total_found) {
Josef Bacik96303082009-07-13 21:29:25 -04002328 start = i;
Alexandre Olivab78d09b2011-11-30 13:43:00 -05002329 cluster->max_size = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002330 }
2331
2332 total_found += found_bits;
2333
2334 if (cluster->max_size < found_bits * block_group->sectorsize)
2335 cluster->max_size = found_bits * block_group->sectorsize;
2336
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002337 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2338 i = next_zero + 1;
Josef Bacik96303082009-07-13 21:29:25 -04002339 goto again;
2340 }
2341
2342 cluster->window_start = start * block_group->sectorsize +
2343 entry->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08002344 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002345 ret = tree_insert_offset(&cluster->root, entry->offset,
2346 &entry->offset_index, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002347 BUG_ON(ret); /* -EEXIST; Logic error */
Josef Bacik96303082009-07-13 21:29:25 -04002348
Josef Bacik3f7de032011-11-10 08:29:20 -05002349 trace_btrfs_setup_cluster(block_group, cluster,
2350 total_found * block_group->sectorsize, 1);
Josef Bacik96303082009-07-13 21:29:25 -04002351 return 0;
2352}
2353
Chris Masonfa9c0d792009-04-03 09:47:43 -04002354/*
Josef Bacik4e69b592011-03-21 10:11:24 -04002355 * This searches the block group for just extents to fill the cluster with.
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002356 * Try to find a cluster with at least bytes total bytes, at least one
2357 * extent of cont1_bytes, and other clusters of at least min_bytes.
Josef Bacik4e69b592011-03-21 10:11:24 -04002358 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002359static noinline int
2360setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2361 struct btrfs_free_cluster *cluster,
2362 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002363 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002364{
Li Zefan34d52cb2011-03-29 13:46:06 +08002365 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002366 struct btrfs_free_space *first = NULL;
2367 struct btrfs_free_space *entry = NULL;
Josef Bacik4e69b592011-03-21 10:11:24 -04002368 struct btrfs_free_space *last;
2369 struct rb_node *node;
2370 u64 window_start;
2371 u64 window_free;
2372 u64 max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05002373 u64 total_size = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002374
Li Zefan34d52cb2011-03-29 13:46:06 +08002375 entry = tree_search_offset(ctl, offset, 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002376 if (!entry)
2377 return -ENOSPC;
2378
2379 /*
2380 * We don't want bitmaps, so just move along until we find a normal
2381 * extent entry.
2382 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002383 while (entry->bitmap || entry->bytes < min_bytes) {
2384 if (entry->bitmap && list_empty(&entry->list))
Josef Bacik86d4a772011-05-25 13:03:16 -04002385 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002386 node = rb_next(&entry->offset_index);
2387 if (!node)
2388 return -ENOSPC;
2389 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2390 }
2391
2392 window_start = entry->offset;
2393 window_free = entry->bytes;
2394 max_extent = entry->bytes;
2395 first = entry;
2396 last = entry;
Josef Bacik4e69b592011-03-21 10:11:24 -04002397
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002398 for (node = rb_next(&entry->offset_index); node;
2399 node = rb_next(&entry->offset_index)) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002400 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2401
Josef Bacik86d4a772011-05-25 13:03:16 -04002402 if (entry->bitmap) {
2403 if (list_empty(&entry->list))
2404 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002405 continue;
Josef Bacik86d4a772011-05-25 13:03:16 -04002406 }
2407
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002408 if (entry->bytes < min_bytes)
2409 continue;
2410
2411 last = entry;
2412 window_free += entry->bytes;
2413 if (entry->bytes > max_extent)
Josef Bacik4e69b592011-03-21 10:11:24 -04002414 max_extent = entry->bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04002415 }
2416
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002417 if (window_free < bytes || max_extent < cont1_bytes)
2418 return -ENOSPC;
2419
Josef Bacik4e69b592011-03-21 10:11:24 -04002420 cluster->window_start = first->offset;
2421
2422 node = &first->offset_index;
2423
2424 /*
2425 * now we've found our entries, pull them out of the free space
2426 * cache and put them into the cluster rbtree
2427 */
2428 do {
2429 int ret;
2430
2431 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2432 node = rb_next(&entry->offset_index);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002433 if (entry->bitmap || entry->bytes < min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002434 continue;
2435
Li Zefan34d52cb2011-03-29 13:46:06 +08002436 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002437 ret = tree_insert_offset(&cluster->root, entry->offset,
2438 &entry->offset_index, 0);
Josef Bacik3f7de032011-11-10 08:29:20 -05002439 total_size += entry->bytes;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002440 BUG_ON(ret); /* -EEXIST; Logic error */
Josef Bacik4e69b592011-03-21 10:11:24 -04002441 } while (node && entry != last);
2442
2443 cluster->max_size = max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05002444 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
Josef Bacik4e69b592011-03-21 10:11:24 -04002445 return 0;
2446}
2447
2448/*
2449 * This specifically looks for bitmaps that may work in the cluster, we assume
2450 * that we have already failed to find extents that will work.
2451 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002452static noinline int
2453setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2454 struct btrfs_free_cluster *cluster,
2455 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002456 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002457{
Li Zefan34d52cb2011-03-29 13:46:06 +08002458 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002459 struct btrfs_free_space *entry;
Josef Bacik4e69b592011-03-21 10:11:24 -04002460 int ret = -ENOSPC;
Li Zefan0f0fbf12011-11-20 07:33:38 -05002461 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002462
Li Zefan34d52cb2011-03-29 13:46:06 +08002463 if (ctl->total_bitmaps == 0)
Josef Bacik4e69b592011-03-21 10:11:24 -04002464 return -ENOSPC;
2465
Josef Bacik86d4a772011-05-25 13:03:16 -04002466 /*
Li Zefan0f0fbf12011-11-20 07:33:38 -05002467 * The bitmap that covers offset won't be in the list unless offset
2468 * is just its start offset.
2469 */
2470 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2471 if (entry->offset != bitmap_offset) {
2472 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2473 if (entry && list_empty(&entry->list))
2474 list_add(&entry->list, bitmaps);
2475 }
2476
Josef Bacik86d4a772011-05-25 13:03:16 -04002477 list_for_each_entry(entry, bitmaps, list) {
Josef Bacik357b9782012-01-26 15:01:11 -05002478 if (entry->bytes < bytes)
Josef Bacik86d4a772011-05-25 13:03:16 -04002479 continue;
2480 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002481 bytes, cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04002482 if (!ret)
2483 return 0;
2484 }
2485
2486 /*
Li Zefan52621cb2011-11-20 07:33:38 -05002487 * The bitmaps list has all the bitmaps that record free space
2488 * starting after offset, so no more search is required.
Josef Bacik86d4a772011-05-25 13:03:16 -04002489 */
Li Zefan52621cb2011-11-20 07:33:38 -05002490 return -ENOSPC;
Josef Bacik4e69b592011-03-21 10:11:24 -04002491}
2492
2493/*
Chris Masonfa9c0d792009-04-03 09:47:43 -04002494 * here we try to find a cluster of blocks in a block group. The goal
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002495 * is to find at least bytes+empty_size.
Chris Masonfa9c0d792009-04-03 09:47:43 -04002496 * We might not find them all in one contiguous area.
2497 *
2498 * returns zero and sets up cluster if things worked out, otherwise
2499 * it returns -enospc
2500 */
2501int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Chris Mason451d7582009-06-09 20:28:34 -04002502 struct btrfs_root *root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04002503 struct btrfs_block_group_cache *block_group,
2504 struct btrfs_free_cluster *cluster,
2505 u64 offset, u64 bytes, u64 empty_size)
2506{
Li Zefan34d52cb2011-03-29 13:46:06 +08002507 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik86d4a772011-05-25 13:03:16 -04002508 struct btrfs_free_space *entry, *tmp;
Li Zefan52621cb2011-11-20 07:33:38 -05002509 LIST_HEAD(bitmaps);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002510 u64 min_bytes;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002511 u64 cont1_bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002512 int ret;
2513
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002514 /*
2515 * Choose the minimum extent size we'll require for this
2516 * cluster. For SSD_SPREAD, don't allow any fragmentation.
2517 * For metadata, allow allocates with smaller extents. For
2518 * data, keep it dense.
2519 */
Chris Mason451d7582009-06-09 20:28:34 -04002520 if (btrfs_test_opt(root, SSD_SPREAD)) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002521 cont1_bytes = min_bytes = bytes + empty_size;
Chris Mason451d7582009-06-09 20:28:34 -04002522 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002523 cont1_bytes = bytes;
2524 min_bytes = block_group->sectorsize;
2525 } else {
2526 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2527 min_bytes = block_group->sectorsize;
2528 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002529
Li Zefan34d52cb2011-03-29 13:46:06 +08002530 spin_lock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002531
2532 /*
2533 * If we know we don't have enough space to make a cluster don't even
2534 * bother doing all the work to try and find one.
2535 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002536 if (ctl->free_space < bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002537 spin_unlock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002538 return -ENOSPC;
2539 }
2540
Chris Masonfa9c0d792009-04-03 09:47:43 -04002541 spin_lock(&cluster->lock);
2542
2543 /* someone already found a cluster, hooray */
2544 if (cluster->block_group) {
2545 ret = 0;
2546 goto out;
2547 }
Josef Bacik4e69b592011-03-21 10:11:24 -04002548
Josef Bacik3f7de032011-11-10 08:29:20 -05002549 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2550 min_bytes);
2551
2552 INIT_LIST_HEAD(&bitmaps);
Josef Bacik86d4a772011-05-25 13:03:16 -04002553 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002554 bytes + empty_size,
2555 cont1_bytes, min_bytes);
Josef Bacik4e69b592011-03-21 10:11:24 -04002556 if (ret)
Josef Bacik86d4a772011-05-25 13:03:16 -04002557 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002558 offset, bytes + empty_size,
2559 cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04002560
2561 /* Clear our temporary list */
2562 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2563 list_del_init(&entry->list);
Josef Bacik4e69b592011-03-21 10:11:24 -04002564
2565 if (!ret) {
2566 atomic_inc(&block_group->count);
2567 list_add_tail(&cluster->block_group_list,
2568 &block_group->cluster_list);
2569 cluster->block_group = block_group;
Josef Bacik3f7de032011-11-10 08:29:20 -05002570 } else {
2571 trace_btrfs_failed_cluster_setup(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002572 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002573out:
2574 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08002575 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002576
2577 return ret;
2578}
2579
2580/*
2581 * simple code to zero out a cluster
2582 */
2583void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2584{
2585 spin_lock_init(&cluster->lock);
2586 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00002587 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002588 cluster->max_size = 0;
2589 INIT_LIST_HEAD(&cluster->block_group_list);
2590 cluster->block_group = NULL;
2591}
2592
Li Zefan7fe1e642011-12-29 14:47:27 +08002593static int do_trimming(struct btrfs_block_group_cache *block_group,
2594 u64 *total_trimmed, u64 start, u64 bytes,
2595 u64 reserved_start, u64 reserved_bytes)
2596{
2597 struct btrfs_space_info *space_info = block_group->space_info;
2598 struct btrfs_fs_info *fs_info = block_group->fs_info;
2599 int ret;
2600 int update = 0;
2601 u64 trimmed = 0;
2602
2603 spin_lock(&space_info->lock);
2604 spin_lock(&block_group->lock);
2605 if (!block_group->ro) {
2606 block_group->reserved += reserved_bytes;
2607 space_info->bytes_reserved += reserved_bytes;
2608 update = 1;
2609 }
2610 spin_unlock(&block_group->lock);
2611 spin_unlock(&space_info->lock);
2612
2613 ret = btrfs_error_discard_extent(fs_info->extent_root,
2614 start, bytes, &trimmed);
2615 if (!ret)
2616 *total_trimmed += trimmed;
2617
2618 btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2619
2620 if (update) {
2621 spin_lock(&space_info->lock);
2622 spin_lock(&block_group->lock);
2623 if (block_group->ro)
2624 space_info->bytes_readonly += reserved_bytes;
2625 block_group->reserved -= reserved_bytes;
2626 space_info->bytes_reserved -= reserved_bytes;
2627 spin_unlock(&space_info->lock);
2628 spin_unlock(&block_group->lock);
2629 }
2630
2631 return ret;
2632}
2633
2634static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2635 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
Li Dongyangf7039b12011-03-24 10:24:28 +00002636{
Li Zefan34d52cb2011-03-29 13:46:06 +08002637 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan7fe1e642011-12-29 14:47:27 +08002638 struct btrfs_free_space *entry;
2639 struct rb_node *node;
Li Dongyangf7039b12011-03-24 10:24:28 +00002640 int ret = 0;
Li Zefan7fe1e642011-12-29 14:47:27 +08002641 u64 extent_start;
2642 u64 extent_bytes;
2643 u64 bytes;
Li Dongyangf7039b12011-03-24 10:24:28 +00002644
2645 while (start < end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002646 spin_lock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002647
Li Zefan34d52cb2011-03-29 13:46:06 +08002648 if (ctl->free_space < minlen) {
2649 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002650 break;
2651 }
2652
Li Zefan34d52cb2011-03-29 13:46:06 +08002653 entry = tree_search_offset(ctl, start, 0, 1);
Li Zefan7fe1e642011-12-29 14:47:27 +08002654 if (!entry) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002655 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002656 break;
2657 }
2658
Li Zefan7fe1e642011-12-29 14:47:27 +08002659 /* skip bitmaps */
2660 while (entry->bitmap) {
2661 node = rb_next(&entry->offset_index);
2662 if (!node) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002663 spin_unlock(&ctl->tree_lock);
Li Zefan7fe1e642011-12-29 14:47:27 +08002664 goto out;
Li Dongyangf7039b12011-03-24 10:24:28 +00002665 }
Li Zefan7fe1e642011-12-29 14:47:27 +08002666 entry = rb_entry(node, struct btrfs_free_space,
2667 offset_index);
Li Dongyangf7039b12011-03-24 10:24:28 +00002668 }
2669
Li Zefan7fe1e642011-12-29 14:47:27 +08002670 if (entry->offset >= end) {
2671 spin_unlock(&ctl->tree_lock);
2672 break;
2673 }
2674
2675 extent_start = entry->offset;
2676 extent_bytes = entry->bytes;
2677 start = max(start, extent_start);
2678 bytes = min(extent_start + extent_bytes, end) - start;
2679 if (bytes < minlen) {
2680 spin_unlock(&ctl->tree_lock);
2681 goto next;
2682 }
2683
2684 unlink_free_space(ctl, entry);
2685 kmem_cache_free(btrfs_free_space_cachep, entry);
2686
Li Zefan34d52cb2011-03-29 13:46:06 +08002687 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002688
Li Zefan7fe1e642011-12-29 14:47:27 +08002689 ret = do_trimming(block_group, total_trimmed, start, bytes,
2690 extent_start, extent_bytes);
2691 if (ret)
2692 break;
2693next:
Li Dongyangf7039b12011-03-24 10:24:28 +00002694 start += bytes;
Li Dongyangf7039b12011-03-24 10:24:28 +00002695
2696 if (fatal_signal_pending(current)) {
2697 ret = -ERESTARTSYS;
2698 break;
2699 }
2700
2701 cond_resched();
2702 }
Li Zefan7fe1e642011-12-29 14:47:27 +08002703out:
2704 return ret;
2705}
2706
2707static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2708 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2709{
2710 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2711 struct btrfs_free_space *entry;
2712 int ret = 0;
2713 int ret2;
2714 u64 bytes;
2715 u64 offset = offset_to_bitmap(ctl, start);
2716
2717 while (offset < end) {
2718 bool next_bitmap = false;
2719
2720 spin_lock(&ctl->tree_lock);
2721
2722 if (ctl->free_space < minlen) {
2723 spin_unlock(&ctl->tree_lock);
2724 break;
2725 }
2726
2727 entry = tree_search_offset(ctl, offset, 1, 0);
2728 if (!entry) {
2729 spin_unlock(&ctl->tree_lock);
2730 next_bitmap = true;
2731 goto next;
2732 }
2733
2734 bytes = minlen;
2735 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2736 if (ret2 || start >= end) {
2737 spin_unlock(&ctl->tree_lock);
2738 next_bitmap = true;
2739 goto next;
2740 }
2741
2742 bytes = min(bytes, end - start);
2743 if (bytes < minlen) {
2744 spin_unlock(&ctl->tree_lock);
2745 goto next;
2746 }
2747
2748 bitmap_clear_bits(ctl, entry, start, bytes);
2749 if (entry->bytes == 0)
2750 free_bitmap(ctl, entry);
2751
2752 spin_unlock(&ctl->tree_lock);
2753
2754 ret = do_trimming(block_group, total_trimmed, start, bytes,
2755 start, bytes);
2756 if (ret)
2757 break;
2758next:
2759 if (next_bitmap) {
2760 offset += BITS_PER_BITMAP * ctl->unit;
2761 } else {
2762 start += bytes;
2763 if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2764 offset += BITS_PER_BITMAP * ctl->unit;
2765 }
2766
2767 if (fatal_signal_pending(current)) {
2768 ret = -ERESTARTSYS;
2769 break;
2770 }
2771
2772 cond_resched();
2773 }
2774
2775 return ret;
2776}
2777
2778int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2779 u64 *trimmed, u64 start, u64 end, u64 minlen)
2780{
2781 int ret;
2782
2783 *trimmed = 0;
2784
2785 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2786 if (ret)
2787 return ret;
2788
2789 ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
Li Dongyangf7039b12011-03-24 10:24:28 +00002790
2791 return ret;
2792}
Li Zefan581bb052011-04-20 10:06:11 +08002793
2794/*
2795 * Find the left-most item in the cache tree, and then return the
2796 * smallest inode number in the item.
2797 *
2798 * Note: the returned inode number may not be the smallest one in
2799 * the tree, if the left-most item is a bitmap.
2800 */
2801u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2802{
2803 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2804 struct btrfs_free_space *entry = NULL;
2805 u64 ino = 0;
2806
2807 spin_lock(&ctl->tree_lock);
2808
2809 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2810 goto out;
2811
2812 entry = rb_entry(rb_first(&ctl->free_space_offset),
2813 struct btrfs_free_space, offset_index);
2814
2815 if (!entry->bitmap) {
2816 ino = entry->offset;
2817
2818 unlink_free_space(ctl, entry);
2819 entry->offset++;
2820 entry->bytes--;
2821 if (!entry->bytes)
2822 kmem_cache_free(btrfs_free_space_cachep, entry);
2823 else
2824 link_free_space(ctl, entry);
2825 } else {
2826 u64 offset = 0;
2827 u64 count = 1;
2828 int ret;
2829
2830 ret = search_bitmap(ctl, entry, &offset, &count);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002831 /* Logic error; Should be empty if it can't find anything */
Li Zefan581bb052011-04-20 10:06:11 +08002832 BUG_ON(ret);
2833
2834 ino = offset;
2835 bitmap_clear_bits(ctl, entry, offset, 1);
2836 if (entry->bytes == 0)
2837 free_bitmap(ctl, entry);
2838 }
2839out:
2840 spin_unlock(&ctl->tree_lock);
2841
2842 return ino;
2843}
Li Zefan82d59022011-04-20 10:33:24 +08002844
2845struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2846 struct btrfs_path *path)
2847{
2848 struct inode *inode = NULL;
2849
2850 spin_lock(&root->cache_lock);
2851 if (root->cache_inode)
2852 inode = igrab(root->cache_inode);
2853 spin_unlock(&root->cache_lock);
2854 if (inode)
2855 return inode;
2856
2857 inode = __lookup_free_space_inode(root, path, 0);
2858 if (IS_ERR(inode))
2859 return inode;
2860
2861 spin_lock(&root->cache_lock);
David Sterba7841cb22011-05-31 18:07:27 +02002862 if (!btrfs_fs_closing(root->fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002863 root->cache_inode = igrab(inode);
2864 spin_unlock(&root->cache_lock);
2865
2866 return inode;
2867}
2868
2869int create_free_ino_inode(struct btrfs_root *root,
2870 struct btrfs_trans_handle *trans,
2871 struct btrfs_path *path)
2872{
2873 return __create_free_space_inode(root, trans, path,
2874 BTRFS_FREE_INO_OBJECTID, 0);
2875}
2876
2877int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2878{
2879 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2880 struct btrfs_path *path;
2881 struct inode *inode;
2882 int ret = 0;
2883 u64 root_gen = btrfs_root_generation(&root->root_item);
2884
Chris Mason4b9465c2011-06-03 09:36:29 -04002885 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2886 return 0;
2887
Li Zefan82d59022011-04-20 10:33:24 +08002888 /*
2889 * If we're unmounting then just return, since this does a search on the
2890 * normal root and not the commit root and we could deadlock.
2891 */
David Sterba7841cb22011-05-31 18:07:27 +02002892 if (btrfs_fs_closing(fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002893 return 0;
2894
2895 path = btrfs_alloc_path();
2896 if (!path)
2897 return 0;
2898
2899 inode = lookup_free_ino_inode(root, path);
2900 if (IS_ERR(inode))
2901 goto out;
2902
2903 if (root_gen != BTRFS_I(inode)->generation)
2904 goto out_put;
2905
2906 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2907
2908 if (ret < 0)
2909 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2910 "root %llu\n", root->root_key.objectid);
2911out_put:
2912 iput(inode);
2913out:
2914 btrfs_free_path(path);
2915 return ret;
2916}
2917
2918int btrfs_write_out_ino_cache(struct btrfs_root *root,
2919 struct btrfs_trans_handle *trans,
2920 struct btrfs_path *path)
2921{
2922 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2923 struct inode *inode;
2924 int ret;
2925
Chris Mason4b9465c2011-06-03 09:36:29 -04002926 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2927 return 0;
2928
Li Zefan82d59022011-04-20 10:33:24 +08002929 inode = lookup_free_ino_inode(root, path);
2930 if (IS_ERR(inode))
2931 return 0;
2932
2933 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
Josef Bacikc09544e2011-08-30 10:19:10 -04002934 if (ret) {
2935 btrfs_delalloc_release_metadata(inode, inode->i_size);
2936#ifdef DEBUG
Li Zefan82d59022011-04-20 10:33:24 +08002937 printk(KERN_ERR "btrfs: failed to write free ino cache "
2938 "for root %llu\n", root->root_key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04002939#endif
2940 }
Li Zefan82d59022011-04-20 10:33:24 +08002941
2942 iput(inode);
2943 return ret;
2944}