blob: 054707ed57914ab97b2f8a5e2e1e9f9679884bba [file] [log] [blame]
Josef Bacik0f9dd462008-09-23 13:14:11 -04001/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Josef Bacik96303082009-07-13 21:29:25 -040019#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040020#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040022#include <linux/math64.h>
Josef Bacik6ab60602011-08-08 08:24:46 -040023#include <linux/ratelimit.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040024#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025#include "free-space-cache.h"
26#include "transaction.h"
Josef Bacik0af3d002010-06-21 14:48:16 -040027#include "disk-io.h"
Josef Bacik43be2142011-04-01 14:55:00 +000028#include "extent_io.h"
Li Zefan581bb052011-04-20 10:06:11 +080029#include "inode-map.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040030
Josef Bacik96303082009-07-13 21:29:25 -040031#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
Li Zefan34d52cb2011-03-29 13:46:06 +080034static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0cb59c92010-07-02 12:14:14 -040035 struct btrfs_free_space *info);
36
Li Zefan0414efa2011-04-20 10:20:14 +080037static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -040040{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
Josef Bacik0af3d002010-06-21 14:48:16 -040049 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +080050 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -040051 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +020057 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040058 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
David Sterbab3b4aa72011-04-21 01:20:15 +020066 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040067
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
Miao Xieadae52b2011-03-31 09:43:23 +000078 inode->i_mapping->flags &= ~__GFP_FS;
79
Li Zefan0414efa2011-04-20 10:20:14 +080080 return inode;
81}
82
83struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
86{
87 struct inode *inode = NULL;
Josef Bacik5b0e95b2011-10-06 08:58:24 -040088 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
Li Zefan0414efa2011-04-20 10:20:14 +080089
90 spin_lock(&block_group->lock);
91 if (block_group->inode)
92 inode = igrab(block_group->inode);
93 spin_unlock(&block_group->lock);
94 if (inode)
95 return inode;
96
97 inode = __lookup_free_space_inode(root, path,
98 block_group->key.objectid);
99 if (IS_ERR(inode))
100 return inode;
101
Josef Bacik0af3d002010-06-21 14:48:16 -0400102 spin_lock(&block_group->lock);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400103 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
Josef Bacik2f356122011-06-10 15:31:13 -0400104 printk(KERN_INFO "Old style space inode found, converting.\n");
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106 BTRFS_INODE_NODATACOW;
Josef Bacik2f356122011-06-10 15:31:13 -0400107 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108 }
109
Josef Bacik300e4f82011-08-29 14:06:00 -0400110 if (!block_group->iref) {
Josef Bacik0af3d002010-06-21 14:48:16 -0400111 block_group->inode = igrab(inode);
112 block_group->iref = 1;
113 }
114 spin_unlock(&block_group->lock);
115
116 return inode;
117}
118
Li Zefan0414efa2011-04-20 10:20:14 +0800119int __create_free_space_inode(struct btrfs_root *root,
120 struct btrfs_trans_handle *trans,
121 struct btrfs_path *path, u64 ino, u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -0400122{
123 struct btrfs_key key;
124 struct btrfs_disk_key disk_key;
125 struct btrfs_free_space_header *header;
126 struct btrfs_inode_item *inode_item;
127 struct extent_buffer *leaf;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
Josef Bacik0af3d002010-06-21 14:48:16 -0400129 int ret;
130
Li Zefan0414efa2011-04-20 10:20:14 +0800131 ret = btrfs_insert_empty_inode(trans, root, path, ino);
Josef Bacik0af3d002010-06-21 14:48:16 -0400132 if (ret)
133 return ret;
134
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400135 /* We inline crc's for the free disk space cache */
136 if (ino != BTRFS_FREE_INO_OBJECTID)
137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
Josef Bacik0af3d002010-06-21 14:48:16 -0400139 leaf = path->nodes[0];
140 inode_item = btrfs_item_ptr(leaf, path->slots[0],
141 struct btrfs_inode_item);
142 btrfs_item_key(leaf, &disk_key, path->slots[0]);
143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144 sizeof(*inode_item));
145 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146 btrfs_set_inode_size(leaf, inode_item, 0);
147 btrfs_set_inode_nbytes(leaf, inode_item, 0);
148 btrfs_set_inode_uid(leaf, inode_item, 0);
149 btrfs_set_inode_gid(leaf, inode_item, 0);
150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400151 btrfs_set_inode_flags(leaf, inode_item, flags);
Josef Bacik0af3d002010-06-21 14:48:16 -0400152 btrfs_set_inode_nlink(leaf, inode_item, 1);
153 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
Li Zefan0414efa2011-04-20 10:20:14 +0800154 btrfs_set_inode_block_group(leaf, inode_item, offset);
Josef Bacik0af3d002010-06-21 14:48:16 -0400155 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200156 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400157
158 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800159 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -0400160 key.type = 0;
161
162 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 sizeof(struct btrfs_free_space_header));
164 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200165 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400166 return ret;
167 }
168 leaf = path->nodes[0];
169 header = btrfs_item_ptr(leaf, path->slots[0],
170 struct btrfs_free_space_header);
171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172 btrfs_set_free_space_key(leaf, header, &disk_key);
173 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200174 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400175
176 return 0;
177}
178
Li Zefan0414efa2011-04-20 10:20:14 +0800179int create_free_space_inode(struct btrfs_root *root,
180 struct btrfs_trans_handle *trans,
181 struct btrfs_block_group_cache *block_group,
182 struct btrfs_path *path)
183{
184 int ret;
185 u64 ino;
186
187 ret = btrfs_find_free_objectid(root, &ino);
188 if (ret < 0)
189 return ret;
190
191 return __create_free_space_inode(root, trans, path, ino,
192 block_group->key.objectid);
193}
194
Josef Bacik0af3d002010-06-21 14:48:16 -0400195int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196 struct btrfs_trans_handle *trans,
197 struct btrfs_path *path,
198 struct inode *inode)
199{
Liu Bo65450aa2011-09-11 10:52:24 -0400200 struct btrfs_block_rsv *rsv;
Josef Bacikc8174312011-11-02 09:29:35 -0400201 u64 needed_bytes;
Josef Bacik0af3d002010-06-21 14:48:16 -0400202 loff_t oldsize;
203 int ret = 0;
204
Liu Bo65450aa2011-09-11 10:52:24 -0400205 rsv = trans->block_rsv;
Josef Bacikc8174312011-11-02 09:29:35 -0400206 trans->block_rsv = &root->fs_info->global_block_rsv;
207
208 /* 1 for slack space, 1 for updating the inode */
209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210 btrfs_calc_trans_metadata_size(root, 1);
211
212 spin_lock(&trans->block_rsv->lock);
213 if (trans->block_rsv->reserved < needed_bytes) {
214 spin_unlock(&trans->block_rsv->lock);
215 trans->block_rsv = rsv;
216 return -ENOSPC;
217 }
218 spin_unlock(&trans->block_rsv->lock);
Josef Bacik0af3d002010-06-21 14:48:16 -0400219
220 oldsize = i_size_read(inode);
221 btrfs_i_size_write(inode, 0);
222 truncate_pagecache(inode, oldsize, 0);
223
224 /*
225 * We don't need an orphan item because truncating the free space cache
226 * will never be split across transactions.
227 */
228 ret = btrfs_truncate_inode_items(trans, root, inode,
229 0, BTRFS_EXTENT_DATA_KEY);
Liu Bo65450aa2011-09-11 10:52:24 -0400230
Josef Bacik0af3d002010-06-21 14:48:16 -0400231 if (ret) {
Josef Bacikc8174312011-11-02 09:29:35 -0400232 trans->block_rsv = rsv;
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100233 btrfs_abort_transaction(trans, root, ret);
Josef Bacik0af3d002010-06-21 14:48:16 -0400234 return ret;
235 }
236
Li Zefan82d59022011-04-20 10:33:24 +0800237 ret = btrfs_update_inode(trans, root, inode);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100238 if (ret)
239 btrfs_abort_transaction(trans, root, ret);
Josef Bacikc8174312011-11-02 09:29:35 -0400240 trans->block_rsv = rsv;
241
Li Zefan82d59022011-04-20 10:33:24 +0800242 return ret;
Josef Bacik0af3d002010-06-21 14:48:16 -0400243}
244
Josef Bacik9d66e232010-08-25 16:54:15 -0400245static int readahead_cache(struct inode *inode)
246{
247 struct file_ra_state *ra;
248 unsigned long last_index;
249
250 ra = kzalloc(sizeof(*ra), GFP_NOFS);
251 if (!ra)
252 return -ENOMEM;
253
254 file_ra_state_init(ra, inode->i_mapping);
255 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
256
257 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
258
259 kfree(ra);
260
261 return 0;
262}
263
Josef Bacika67509c2011-10-05 15:18:58 -0400264struct io_ctl {
265 void *cur, *orig;
266 struct page *page;
267 struct page **pages;
268 struct btrfs_root *root;
269 unsigned long size;
270 int index;
271 int num_pages;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400272 unsigned check_crcs:1;
Josef Bacika67509c2011-10-05 15:18:58 -0400273};
274
275static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
276 struct btrfs_root *root)
277{
278 memset(io_ctl, 0, sizeof(struct io_ctl));
279 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
280 PAGE_CACHE_SHIFT;
281 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
282 GFP_NOFS);
283 if (!io_ctl->pages)
284 return -ENOMEM;
285 io_ctl->root = root;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400286 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
287 io_ctl->check_crcs = 1;
Josef Bacika67509c2011-10-05 15:18:58 -0400288 return 0;
289}
290
291static void io_ctl_free(struct io_ctl *io_ctl)
292{
293 kfree(io_ctl->pages);
294}
295
296static void io_ctl_unmap_page(struct io_ctl *io_ctl)
297{
298 if (io_ctl->cur) {
299 kunmap(io_ctl->page);
300 io_ctl->cur = NULL;
301 io_ctl->orig = NULL;
302 }
303}
304
305static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
306{
307 WARN_ON(io_ctl->cur);
308 BUG_ON(io_ctl->index >= io_ctl->num_pages);
309 io_ctl->page = io_ctl->pages[io_ctl->index++];
310 io_ctl->cur = kmap(io_ctl->page);
311 io_ctl->orig = io_ctl->cur;
312 io_ctl->size = PAGE_CACHE_SIZE;
313 if (clear)
314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
315}
316
317static void io_ctl_drop_pages(struct io_ctl *io_ctl)
318{
319 int i;
320
321 io_ctl_unmap_page(io_ctl);
322
323 for (i = 0; i < io_ctl->num_pages; i++) {
Li Zefana1ee5a42012-01-09 14:27:42 +0800324 if (io_ctl->pages[i]) {
325 ClearPageChecked(io_ctl->pages[i]);
326 unlock_page(io_ctl->pages[i]);
327 page_cache_release(io_ctl->pages[i]);
328 }
Josef Bacika67509c2011-10-05 15:18:58 -0400329 }
330}
331
332static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
333 int uptodate)
334{
335 struct page *page;
336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
337 int i;
338
339 for (i = 0; i < io_ctl->num_pages; i++) {
340 page = find_or_create_page(inode->i_mapping, i, mask);
341 if (!page) {
342 io_ctl_drop_pages(io_ctl);
343 return -ENOMEM;
344 }
345 io_ctl->pages[i] = page;
346 if (uptodate && !PageUptodate(page)) {
347 btrfs_readpage(NULL, page);
348 lock_page(page);
349 if (!PageUptodate(page)) {
350 printk(KERN_ERR "btrfs: error reading free "
351 "space cache\n");
352 io_ctl_drop_pages(io_ctl);
353 return -EIO;
354 }
355 }
356 }
357
Josef Bacikf7d61dc2011-11-15 09:31:24 -0500358 for (i = 0; i < io_ctl->num_pages; i++) {
359 clear_page_dirty_for_io(io_ctl->pages[i]);
360 set_page_extent_mapped(io_ctl->pages[i]);
361 }
362
Josef Bacika67509c2011-10-05 15:18:58 -0400363 return 0;
364}
365
366static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
367{
368 u64 *val;
369
370 io_ctl_map_page(io_ctl, 1);
371
372 /*
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400373 * Skip the csum areas. If we don't check crcs then we just have a
374 * 64bit chunk at the front of the first page.
Josef Bacika67509c2011-10-05 15:18:58 -0400375 */
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400376 if (io_ctl->check_crcs) {
377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
379 } else {
380 io_ctl->cur += sizeof(u64);
381 io_ctl->size -= sizeof(u64) * 2;
382 }
Josef Bacika67509c2011-10-05 15:18:58 -0400383
384 val = io_ctl->cur;
385 *val = cpu_to_le64(generation);
386 io_ctl->cur += sizeof(u64);
Josef Bacika67509c2011-10-05 15:18:58 -0400387}
388
389static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
390{
391 u64 *gen;
392
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400393 /*
394 * Skip the crc area. If we don't check crcs then we just have a 64bit
395 * chunk at the front of the first page.
396 */
397 if (io_ctl->check_crcs) {
398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
399 io_ctl->size -= sizeof(u64) +
400 (sizeof(u32) * io_ctl->num_pages);
401 } else {
402 io_ctl->cur += sizeof(u64);
403 io_ctl->size -= sizeof(u64) * 2;
404 }
Josef Bacika67509c2011-10-05 15:18:58 -0400405
Josef Bacika67509c2011-10-05 15:18:58 -0400406 gen = io_ctl->cur;
407 if (le64_to_cpu(*gen) != generation) {
408 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
409 "(%Lu) does not match inode (%Lu)\n", *gen,
410 generation);
411 io_ctl_unmap_page(io_ctl);
412 return -EIO;
413 }
414 io_ctl->cur += sizeof(u64);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400415 return 0;
416}
417
418static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
419{
420 u32 *tmp;
421 u32 crc = ~(u32)0;
422 unsigned offset = 0;
423
424 if (!io_ctl->check_crcs) {
425 io_ctl_unmap_page(io_ctl);
426 return;
427 }
428
429 if (index == 0)
Justin P. Mattockcb54f252011-11-21 08:43:28 -0800430 offset = sizeof(u32) * io_ctl->num_pages;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400431
432 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
433 PAGE_CACHE_SIZE - offset);
434 btrfs_csum_final(crc, (char *)&crc);
435 io_ctl_unmap_page(io_ctl);
436 tmp = kmap(io_ctl->pages[0]);
437 tmp += index;
438 *tmp = crc;
439 kunmap(io_ctl->pages[0]);
440}
441
442static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
443{
444 u32 *tmp, val;
445 u32 crc = ~(u32)0;
446 unsigned offset = 0;
447
448 if (!io_ctl->check_crcs) {
449 io_ctl_map_page(io_ctl, 0);
450 return 0;
451 }
452
453 if (index == 0)
454 offset = sizeof(u32) * io_ctl->num_pages;
455
456 tmp = kmap(io_ctl->pages[0]);
457 tmp += index;
458 val = *tmp;
459 kunmap(io_ctl->pages[0]);
460
461 io_ctl_map_page(io_ctl, 0);
462 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
463 PAGE_CACHE_SIZE - offset);
464 btrfs_csum_final(crc, (char *)&crc);
465 if (val != crc) {
466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
467 "space cache\n");
468 io_ctl_unmap_page(io_ctl);
469 return -EIO;
470 }
471
Josef Bacika67509c2011-10-05 15:18:58 -0400472 return 0;
473}
474
475static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
476 void *bitmap)
477{
478 struct btrfs_free_space_entry *entry;
479
480 if (!io_ctl->cur)
481 return -ENOSPC;
482
483 entry = io_ctl->cur;
484 entry->offset = cpu_to_le64(offset);
485 entry->bytes = cpu_to_le64(bytes);
486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
487 BTRFS_FREE_SPACE_EXTENT;
488 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
489 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
490
491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
492 return 0;
493
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400494 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400495
496 /* No more pages to map */
497 if (io_ctl->index >= io_ctl->num_pages)
498 return 0;
499
500 /* map the next page */
501 io_ctl_map_page(io_ctl, 1);
502 return 0;
503}
504
505static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
506{
507 if (!io_ctl->cur)
508 return -ENOSPC;
509
510 /*
511 * If we aren't at the start of the current page, unmap this one and
512 * map the next one if there is any left.
513 */
514 if (io_ctl->cur != io_ctl->orig) {
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400515 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400516 if (io_ctl->index >= io_ctl->num_pages)
517 return -ENOSPC;
518 io_ctl_map_page(io_ctl, 0);
519 }
520
521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400522 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400523 if (io_ctl->index < io_ctl->num_pages)
524 io_ctl_map_page(io_ctl, 0);
525 return 0;
526}
527
528static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
529{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400530 /*
531 * If we're not on the boundary we know we've modified the page and we
532 * need to crc the page.
533 */
534 if (io_ctl->cur != io_ctl->orig)
535 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536 else
537 io_ctl_unmap_page(io_ctl);
Josef Bacika67509c2011-10-05 15:18:58 -0400538
539 while (io_ctl->index < io_ctl->num_pages) {
540 io_ctl_map_page(io_ctl, 1);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400541 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400542 }
543}
544
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400545static int io_ctl_read_entry(struct io_ctl *io_ctl,
546 struct btrfs_free_space *entry, u8 *type)
Josef Bacika67509c2011-10-05 15:18:58 -0400547{
548 struct btrfs_free_space_entry *e;
Josef Bacik2f120c02011-11-10 20:45:05 -0500549 int ret;
550
551 if (!io_ctl->cur) {
552 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
553 if (ret)
554 return ret;
555 }
Josef Bacika67509c2011-10-05 15:18:58 -0400556
557 e = io_ctl->cur;
558 entry->offset = le64_to_cpu(e->offset);
559 entry->bytes = le64_to_cpu(e->bytes);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400560 *type = e->type;
Josef Bacika67509c2011-10-05 15:18:58 -0400561 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
562 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
563
564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400565 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400566
567 io_ctl_unmap_page(io_ctl);
568
Josef Bacik2f120c02011-11-10 20:45:05 -0500569 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400570}
571
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400572static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
573 struct btrfs_free_space *entry)
Josef Bacika67509c2011-10-05 15:18:58 -0400574{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400575 int ret;
576
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400577 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
578 if (ret)
579 return ret;
580
Josef Bacika67509c2011-10-05 15:18:58 -0400581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
582 io_ctl_unmap_page(io_ctl);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400583
584 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400585}
586
Li Zefan0414efa2011-04-20 10:20:14 +0800587int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588 struct btrfs_free_space_ctl *ctl,
589 struct btrfs_path *path, u64 offset)
Josef Bacik9d66e232010-08-25 16:54:15 -0400590{
Josef Bacik9d66e232010-08-25 16:54:15 -0400591 struct btrfs_free_space_header *header;
592 struct extent_buffer *leaf;
Josef Bacika67509c2011-10-05 15:18:58 -0400593 struct io_ctl io_ctl;
Josef Bacik9d66e232010-08-25 16:54:15 -0400594 struct btrfs_key key;
Josef Bacika67509c2011-10-05 15:18:58 -0400595 struct btrfs_free_space *e, *n;
Josef Bacik9d66e232010-08-25 16:54:15 -0400596 struct list_head bitmaps;
597 u64 num_entries;
598 u64 num_bitmaps;
599 u64 generation;
Josef Bacika67509c2011-10-05 15:18:58 -0400600 u8 type;
Josef Bacikf6a39822011-06-06 10:50:35 -0400601 int ret = 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400602
603 INIT_LIST_HEAD(&bitmaps);
604
Josef Bacik9d66e232010-08-25 16:54:15 -0400605 /* Nothing in the space cache, goodbye */
Li Zefan0414efa2011-04-20 10:20:14 +0800606 if (!i_size_read(inode))
Josef Bacika67509c2011-10-05 15:18:58 -0400607 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400608
609 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800610 key.offset = offset;
Josef Bacik9d66e232010-08-25 16:54:15 -0400611 key.type = 0;
612
613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Li Zefan0414efa2011-04-20 10:20:14 +0800614 if (ret < 0)
Josef Bacika67509c2011-10-05 15:18:58 -0400615 return 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800616 else if (ret > 0) {
Chris Mason945d8962011-05-22 12:33:42 -0400617 btrfs_release_path(path);
Josef Bacika67509c2011-10-05 15:18:58 -0400618 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400619 }
620
Li Zefan0414efa2011-04-20 10:20:14 +0800621 ret = -1;
622
Josef Bacik9d66e232010-08-25 16:54:15 -0400623 leaf = path->nodes[0];
624 header = btrfs_item_ptr(leaf, path->slots[0],
625 struct btrfs_free_space_header);
626 num_entries = btrfs_free_space_entries(leaf, header);
627 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
628 generation = btrfs_free_space_generation(leaf, header);
Chris Mason945d8962011-05-22 12:33:42 -0400629 btrfs_release_path(path);
Josef Bacik9d66e232010-08-25 16:54:15 -0400630
631 if (BTRFS_I(inode)->generation != generation) {
632 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
Li Zefan0414efa2011-04-20 10:20:14 +0800633 " not match free space cache generation (%llu)\n",
Josef Bacik9d66e232010-08-25 16:54:15 -0400634 (unsigned long long)BTRFS_I(inode)->generation,
Li Zefan0414efa2011-04-20 10:20:14 +0800635 (unsigned long long)generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400636 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400637 }
638
639 if (!num_entries)
Josef Bacika67509c2011-10-05 15:18:58 -0400640 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400641
Li Zefan706efc62012-01-09 14:36:28 +0800642 ret = io_ctl_init(&io_ctl, inode, root);
643 if (ret)
644 return ret;
645
Josef Bacik9d66e232010-08-25 16:54:15 -0400646 ret = readahead_cache(inode);
Li Zefan0414efa2011-04-20 10:20:14 +0800647 if (ret)
Josef Bacik9d66e232010-08-25 16:54:15 -0400648 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400649
Josef Bacika67509c2011-10-05 15:18:58 -0400650 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
651 if (ret)
652 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400653
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400654 ret = io_ctl_check_crc(&io_ctl, 0);
655 if (ret)
656 goto free_cache;
657
Josef Bacika67509c2011-10-05 15:18:58 -0400658 ret = io_ctl_check_generation(&io_ctl, generation);
659 if (ret)
660 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400661
Josef Bacika67509c2011-10-05 15:18:58 -0400662 while (num_entries) {
663 e = kmem_cache_zalloc(btrfs_free_space_cachep,
664 GFP_NOFS);
665 if (!e)
Josef Bacik9d66e232010-08-25 16:54:15 -0400666 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400667
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400668 ret = io_ctl_read_entry(&io_ctl, e, &type);
669 if (ret) {
670 kmem_cache_free(btrfs_free_space_cachep, e);
671 goto free_cache;
672 }
673
Josef Bacika67509c2011-10-05 15:18:58 -0400674 if (!e->bytes) {
675 kmem_cache_free(btrfs_free_space_cachep, e);
676 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400677 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400678
Josef Bacika67509c2011-10-05 15:18:58 -0400679 if (type == BTRFS_FREE_SPACE_EXTENT) {
680 spin_lock(&ctl->tree_lock);
681 ret = link_free_space(ctl, e);
682 spin_unlock(&ctl->tree_lock);
683 if (ret) {
684 printk(KERN_ERR "Duplicate entries in "
685 "free space cache, dumping\n");
Josef Bacikdc89e982011-01-28 17:05:48 -0500686 kmem_cache_free(btrfs_free_space_cachep, e);
Josef Bacik9d66e232010-08-25 16:54:15 -0400687 goto free_cache;
688 }
Josef Bacika67509c2011-10-05 15:18:58 -0400689 } else {
690 BUG_ON(!num_bitmaps);
691 num_bitmaps--;
692 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
693 if (!e->bitmap) {
694 kmem_cache_free(
695 btrfs_free_space_cachep, e);
696 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400697 }
Josef Bacika67509c2011-10-05 15:18:58 -0400698 spin_lock(&ctl->tree_lock);
699 ret = link_free_space(ctl, e);
700 ctl->total_bitmaps++;
701 ctl->op->recalc_thresholds(ctl);
702 spin_unlock(&ctl->tree_lock);
703 if (ret) {
704 printk(KERN_ERR "Duplicate entries in "
705 "free space cache, dumping\n");
706 kmem_cache_free(btrfs_free_space_cachep, e);
707 goto free_cache;
708 }
709 list_add_tail(&e->list, &bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400710 }
711
Josef Bacika67509c2011-10-05 15:18:58 -0400712 num_entries--;
Josef Bacik9d66e232010-08-25 16:54:15 -0400713 }
714
Josef Bacik2f120c02011-11-10 20:45:05 -0500715 io_ctl_unmap_page(&io_ctl);
716
Josef Bacika67509c2011-10-05 15:18:58 -0400717 /*
718 * We add the bitmaps at the end of the entries in order that
719 * the bitmap entries are added to the cache.
720 */
721 list_for_each_entry_safe(e, n, &bitmaps, list) {
722 list_del_init(&e->list);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400723 ret = io_ctl_read_bitmap(&io_ctl, e);
724 if (ret)
725 goto free_cache;
Josef Bacika67509c2011-10-05 15:18:58 -0400726 }
727
728 io_ctl_drop_pages(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400729 ret = 1;
730out:
Josef Bacika67509c2011-10-05 15:18:58 -0400731 io_ctl_free(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400732 return ret;
Josef Bacik9d66e232010-08-25 16:54:15 -0400733free_cache:
Josef Bacika67509c2011-10-05 15:18:58 -0400734 io_ctl_drop_pages(&io_ctl);
Li Zefan0414efa2011-04-20 10:20:14 +0800735 __btrfs_remove_free_space_cache(ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400736 goto out;
737}
738
Li Zefan0414efa2011-04-20 10:20:14 +0800739int load_free_space_cache(struct btrfs_fs_info *fs_info,
740 struct btrfs_block_group_cache *block_group)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400741{
Li Zefan34d52cb2011-03-29 13:46:06 +0800742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan0414efa2011-04-20 10:20:14 +0800743 struct btrfs_root *root = fs_info->tree_root;
744 struct inode *inode;
745 struct btrfs_path *path;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400746 int ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800747 bool matched;
748 u64 used = btrfs_block_group_used(&block_group->item);
749
750 /*
751 * If we're unmounting then just return, since this does a search on the
752 * normal root and not the commit root and we could deadlock.
753 */
David Sterba7841cb22011-05-31 18:07:27 +0200754 if (btrfs_fs_closing(fs_info))
Li Zefan0414efa2011-04-20 10:20:14 +0800755 return 0;
756
757 /*
758 * If this block group has been marked to be cleared for one reason or
759 * another then we can't trust the on disk cache, so just return.
760 */
761 spin_lock(&block_group->lock);
762 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
763 spin_unlock(&block_group->lock);
764 return 0;
765 }
766 spin_unlock(&block_group->lock);
767
768 path = btrfs_alloc_path();
769 if (!path)
770 return 0;
771
772 inode = lookup_free_space_inode(root, block_group, path);
773 if (IS_ERR(inode)) {
774 btrfs_free_path(path);
775 return 0;
776 }
777
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400778 /* We may have converted the inode and made the cache invalid. */
779 spin_lock(&block_group->lock);
780 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
781 spin_unlock(&block_group->lock);
Tsutomu Itoha7e221e2012-02-14 17:12:23 +0900782 btrfs_free_path(path);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400783 goto out;
784 }
785 spin_unlock(&block_group->lock);
786
Li Zefan0414efa2011-04-20 10:20:14 +0800787 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
788 path, block_group->key.objectid);
789 btrfs_free_path(path);
790 if (ret <= 0)
791 goto out;
792
793 spin_lock(&ctl->tree_lock);
794 matched = (ctl->free_space == (block_group->key.offset - used -
795 block_group->bytes_super));
796 spin_unlock(&ctl->tree_lock);
797
798 if (!matched) {
799 __btrfs_remove_free_space_cache(ctl);
800 printk(KERN_ERR "block group %llu has an wrong amount of free "
801 "space\n", block_group->key.objectid);
802 ret = -1;
803 }
804out:
805 if (ret < 0) {
806 /* This cache is bogus, make sure it gets cleared */
807 spin_lock(&block_group->lock);
808 block_group->disk_cache_state = BTRFS_DC_CLEAR;
809 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800810 ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800811
812 printk(KERN_ERR "btrfs: failed to load free space cache "
813 "for block group %llu\n", block_group->key.objectid);
814 }
815
816 iput(inode);
817 return ret;
818}
819
Josef Bacikc09544e2011-08-30 10:19:10 -0400820/**
821 * __btrfs_write_out_cache - write out cached info to an inode
822 * @root - the root the inode belongs to
823 * @ctl - the free space cache we are going to write out
824 * @block_group - the block_group for this cache if it belongs to a block_group
825 * @trans - the trans handle
826 * @path - the path to use
827 * @offset - the offset for the key we'll insert
828 *
829 * This function writes out a free space cache struct to disk for quick recovery
830 * on mount. This will return 0 if it was successfull in writing the cache out,
831 * and -1 if it was not.
832 */
Li Zefan0414efa2011-04-20 10:20:14 +0800833int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
834 struct btrfs_free_space_ctl *ctl,
835 struct btrfs_block_group_cache *block_group,
836 struct btrfs_trans_handle *trans,
837 struct btrfs_path *path, u64 offset)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400838{
839 struct btrfs_free_space_header *header;
840 struct extent_buffer *leaf;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400841 struct rb_node *node;
842 struct list_head *pos, *n;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400843 struct extent_state *cached_state = NULL;
Josef Bacik43be2142011-04-01 14:55:00 +0000844 struct btrfs_free_cluster *cluster = NULL;
845 struct extent_io_tree *unpin = NULL;
Josef Bacika67509c2011-10-05 15:18:58 -0400846 struct io_ctl io_ctl;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400847 struct list_head bitmap_list;
848 struct btrfs_key key;
Li Zefandb804f22012-01-10 16:41:01 +0800849 u64 start, extent_start, extent_end, len;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400850 int entries = 0;
851 int bitmaps = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400852 int ret;
853 int err = -1;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400854
Josef Bacik0cb59c92010-07-02 12:14:14 -0400855 INIT_LIST_HEAD(&bitmap_list);
856
Li Zefan0414efa2011-04-20 10:20:14 +0800857 if (!i_size_read(inode))
858 return -1;
Josef Bacik2b209822010-12-03 13:17:53 -0500859
Li Zefan706efc62012-01-09 14:36:28 +0800860 ret = io_ctl_init(&io_ctl, inode, root);
861 if (ret)
862 return -1;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400863
Josef Bacik43be2142011-04-01 14:55:00 +0000864 /* Get the cluster for this block_group if it exists */
Li Zefan0414efa2011-04-20 10:20:14 +0800865 if (block_group && !list_empty(&block_group->cluster_list))
Josef Bacik43be2142011-04-01 14:55:00 +0000866 cluster = list_entry(block_group->cluster_list.next,
867 struct btrfs_free_cluster,
868 block_group_list);
869
Josef Bacika67509c2011-10-05 15:18:58 -0400870 /* Lock all pages first so we can lock the extent safely. */
871 io_ctl_prepare_pages(&io_ctl, inode, 0);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400872
Josef Bacik0cb59c92010-07-02 12:14:14 -0400873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
Jeff Mahoneyd0082372012-03-01 14:57:19 +0100874 0, &cached_state);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400875
Josef Bacikf75b1302011-10-05 10:00:18 -0400876 node = rb_first(&ctl->free_space_offset);
877 if (!node && cluster) {
878 node = rb_first(&cluster->root);
879 cluster = NULL;
880 }
881
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400882 /* Make sure we can fit our crcs into the first page */
883 if (io_ctl.check_crcs &&
884 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
885 WARN_ON(1);
886 goto out_nospc;
887 }
888
Josef Bacika67509c2011-10-05 15:18:58 -0400889 io_ctl_set_generation(&io_ctl, trans->transid);
890
Josef Bacik0cb59c92010-07-02 12:14:14 -0400891 /* Write out the extent entries */
Josef Bacika67509c2011-10-05 15:18:58 -0400892 while (node) {
893 struct btrfs_free_space *e;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400894
Josef Bacika67509c2011-10-05 15:18:58 -0400895 e = rb_entry(node, struct btrfs_free_space, offset_index);
896 entries++;
Josef Bacik43be2142011-04-01 14:55:00 +0000897
Josef Bacika67509c2011-10-05 15:18:58 -0400898 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
899 e->bitmap);
900 if (ret)
901 goto out_nospc;
902
903 if (e->bitmap) {
904 list_add_tail(&e->list, &bitmap_list);
905 bitmaps++;
906 }
907 node = rb_next(node);
908 if (!node && cluster) {
909 node = rb_first(&cluster->root);
910 cluster = NULL;
911 }
912 }
913
914 /*
915 * We want to add any pinned extents to our free space cache
916 * so we don't leak the space
917 */
Li Zefandb804f22012-01-10 16:41:01 +0800918
919 /*
920 * We shouldn't have switched the pinned extents yet so this is the
921 * right one
922 */
923 unpin = root->fs_info->pinned_extents;
924
925 if (block_group)
926 start = block_group->key.objectid;
927
Josef Bacika67509c2011-10-05 15:18:58 -0400928 while (block_group && (start < block_group->key.objectid +
929 block_group->key.offset)) {
Li Zefandb804f22012-01-10 16:41:01 +0800930 ret = find_first_extent_bit(unpin, start,
931 &extent_start, &extent_end,
Josef Bacika67509c2011-10-05 15:18:58 -0400932 EXTENT_DIRTY);
933 if (ret) {
934 ret = 0;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400935 break;
936 }
937
Josef Bacika67509c2011-10-05 15:18:58 -0400938 /* This pinned extent is out of our range */
Li Zefandb804f22012-01-10 16:41:01 +0800939 if (extent_start >= block_group->key.objectid +
Josef Bacika67509c2011-10-05 15:18:58 -0400940 block_group->key.offset)
941 break;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400942
Li Zefandb804f22012-01-10 16:41:01 +0800943 extent_start = max(extent_start, start);
944 extent_end = min(block_group->key.objectid +
945 block_group->key.offset, extent_end + 1);
946 len = extent_end - extent_start;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400947
Josef Bacika67509c2011-10-05 15:18:58 -0400948 entries++;
Li Zefandb804f22012-01-10 16:41:01 +0800949 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
Josef Bacika67509c2011-10-05 15:18:58 -0400950 if (ret)
951 goto out_nospc;
Josef Bacik2f356122011-06-10 15:31:13 -0400952
Li Zefandb804f22012-01-10 16:41:01 +0800953 start = extent_end;
Josef Bacika67509c2011-10-05 15:18:58 -0400954 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400955
956 /* Write out the bitmaps */
957 list_for_each_safe(pos, n, &bitmap_list) {
Josef Bacik0cb59c92010-07-02 12:14:14 -0400958 struct btrfs_free_space *entry =
959 list_entry(pos, struct btrfs_free_space, list);
960
Josef Bacika67509c2011-10-05 15:18:58 -0400961 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
962 if (ret)
963 goto out_nospc;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400964 list_del_init(&entry->list);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400965 }
966
Josef Bacik0cb59c92010-07-02 12:14:14 -0400967 /* Zero out the rest of the pages just to make sure */
Josef Bacika67509c2011-10-05 15:18:58 -0400968 io_ctl_zero_remaining_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400969
Josef Bacika67509c2011-10-05 15:18:58 -0400970 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
971 0, i_size_read(inode), &cached_state);
972 io_ctl_drop_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400973 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
974 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
975
Josef Bacikc09544e2011-08-30 10:19:10 -0400976 if (ret)
Josef Bacik2f356122011-06-10 15:31:13 -0400977 goto out;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400978
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400979
Josef Bacik549b4fd2011-10-05 16:33:53 -0400980 ret = filemap_write_and_wait(inode->i_mapping);
981 if (ret)
982 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400983
984 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800985 key.offset = offset;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400986 key.type = 0;
987
Josef Bacika9b5fcd2011-08-19 12:06:12 -0400988 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400989 if (ret < 0) {
Josef Bacika67509c2011-10-05 15:18:58 -0400990 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400991 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
992 GFP_NOFS);
Josef Bacik2f356122011-06-10 15:31:13 -0400993 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400994 }
995 leaf = path->nodes[0];
996 if (ret > 0) {
997 struct btrfs_key found_key;
998 BUG_ON(!path->slots[0]);
999 path->slots[0]--;
1000 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1001 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
Li Zefan0414efa2011-04-20 10:20:14 +08001002 found_key.offset != offset) {
Josef Bacika67509c2011-10-05 15:18:58 -04001003 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1004 inode->i_size - 1,
Josef Bacik5b0e95b2011-10-06 08:58:24 -04001005 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1006 NULL, GFP_NOFS);
David Sterbab3b4aa72011-04-21 01:20:15 +02001007 btrfs_release_path(path);
Josef Bacik2f356122011-06-10 15:31:13 -04001008 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001009 }
1010 }
Josef Bacik549b4fd2011-10-05 16:33:53 -04001011
1012 BTRFS_I(inode)->generation = trans->transid;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001013 header = btrfs_item_ptr(leaf, path->slots[0],
1014 struct btrfs_free_space_header);
1015 btrfs_set_free_space_entries(leaf, header, entries);
1016 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1017 btrfs_set_free_space_generation(leaf, header, trans->transid);
1018 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +02001019 btrfs_release_path(path);
Josef Bacik0cb59c92010-07-02 12:14:14 -04001020
Josef Bacikc09544e2011-08-30 10:19:10 -04001021 err = 0;
Josef Bacik2f356122011-06-10 15:31:13 -04001022out:
Josef Bacika67509c2011-10-05 15:18:58 -04001023 io_ctl_free(&io_ctl);
Josef Bacikc09544e2011-08-30 10:19:10 -04001024 if (err) {
Josef Bacika67509c2011-10-05 15:18:58 -04001025 invalidate_inode_pages2(inode->i_mapping);
Josef Bacik0cb59c92010-07-02 12:14:14 -04001026 BTRFS_I(inode)->generation = 0;
1027 }
Josef Bacik0cb59c92010-07-02 12:14:14 -04001028 btrfs_update_inode(trans, root, inode);
Josef Bacikc09544e2011-08-30 10:19:10 -04001029 return err;
Josef Bacika67509c2011-10-05 15:18:58 -04001030
1031out_nospc:
1032 list_for_each_safe(pos, n, &bitmap_list) {
1033 struct btrfs_free_space *entry =
1034 list_entry(pos, struct btrfs_free_space, list);
1035 list_del_init(&entry->list);
1036 }
1037 io_ctl_drop_pages(&io_ctl);
1038 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1039 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1040 goto out;
Li Zefan0414efa2011-04-20 10:20:14 +08001041}
1042
1043int btrfs_write_out_cache(struct btrfs_root *root,
1044 struct btrfs_trans_handle *trans,
1045 struct btrfs_block_group_cache *block_group,
1046 struct btrfs_path *path)
1047{
1048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1049 struct inode *inode;
1050 int ret = 0;
1051
1052 root = root->fs_info->tree_root;
1053
1054 spin_lock(&block_group->lock);
1055 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1056 spin_unlock(&block_group->lock);
1057 return 0;
1058 }
1059 spin_unlock(&block_group->lock);
1060
1061 inode = lookup_free_space_inode(root, block_group, path);
1062 if (IS_ERR(inode))
1063 return 0;
1064
1065 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1066 path, block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04001067 if (ret) {
Li Zefan0414efa2011-04-20 10:20:14 +08001068 spin_lock(&block_group->lock);
1069 block_group->disk_cache_state = BTRFS_DC_ERROR;
1070 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +08001071 ret = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -04001072#ifdef DEBUG
Li Zefan0414efa2011-04-20 10:20:14 +08001073 printk(KERN_ERR "btrfs: failed to write free space cace "
1074 "for block group %llu\n", block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04001075#endif
Li Zefan0414efa2011-04-20 10:20:14 +08001076 }
1077
Josef Bacik0cb59c92010-07-02 12:14:14 -04001078 iput(inode);
1079 return ret;
1080}
1081
Li Zefan34d52cb2011-03-29 13:46:06 +08001082static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
Josef Bacik96303082009-07-13 21:29:25 -04001083 u64 offset)
1084{
1085 BUG_ON(offset < bitmap_start);
1086 offset -= bitmap_start;
Li Zefan34d52cb2011-03-29 13:46:06 +08001087 return (unsigned long)(div_u64(offset, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001088}
1089
Li Zefan34d52cb2011-03-29 13:46:06 +08001090static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
Josef Bacik96303082009-07-13 21:29:25 -04001091{
Li Zefan34d52cb2011-03-29 13:46:06 +08001092 return (unsigned long)(div_u64(bytes, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001093}
1094
Li Zefan34d52cb2011-03-29 13:46:06 +08001095static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001096 u64 offset)
1097{
1098 u64 bitmap_start;
1099 u64 bytes_per_bitmap;
1100
Li Zefan34d52cb2011-03-29 13:46:06 +08001101 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1102 bitmap_start = offset - ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -04001103 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1104 bitmap_start *= bytes_per_bitmap;
Li Zefan34d52cb2011-03-29 13:46:06 +08001105 bitmap_start += ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -04001106
1107 return bitmap_start;
1108}
Josef Bacik0f9dd462008-09-23 13:14:11 -04001109
1110static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -04001111 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001112{
1113 struct rb_node **p = &root->rb_node;
1114 struct rb_node *parent = NULL;
1115 struct btrfs_free_space *info;
1116
1117 while (*p) {
1118 parent = *p;
1119 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1120
Josef Bacik96303082009-07-13 21:29:25 -04001121 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001122 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -04001123 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001124 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -04001125 } else {
1126 /*
1127 * we could have a bitmap entry and an extent entry
1128 * share the same offset. If this is the case, we want
1129 * the extent entry to always be found first if we do a
1130 * linear search through the tree, since we want to have
1131 * the quickest allocation time, and allocating from an
1132 * extent is faster than allocating from a bitmap. So
1133 * if we're inserting a bitmap and we find an entry at
1134 * this offset, we want to go right, or after this entry
1135 * logically. If we are inserting an extent and we've
1136 * found a bitmap, we want to go left, or before
1137 * logically.
1138 */
1139 if (bitmap) {
Josef Bacik207dde82011-05-13 14:49:23 -04001140 if (info->bitmap) {
1141 WARN_ON_ONCE(1);
1142 return -EEXIST;
1143 }
Josef Bacik96303082009-07-13 21:29:25 -04001144 p = &(*p)->rb_right;
1145 } else {
Josef Bacik207dde82011-05-13 14:49:23 -04001146 if (!info->bitmap) {
1147 WARN_ON_ONCE(1);
1148 return -EEXIST;
1149 }
Josef Bacik96303082009-07-13 21:29:25 -04001150 p = &(*p)->rb_left;
1151 }
1152 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001153 }
1154
1155 rb_link_node(node, parent, p);
1156 rb_insert_color(node, root);
1157
1158 return 0;
1159}
1160
1161/*
Josef Bacik70cb0742009-04-03 10:14:19 -04001162 * searches the tree for the given offset.
1163 *
Josef Bacik96303082009-07-13 21:29:25 -04001164 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1165 * want a section that has at least bytes size and comes at or after the given
1166 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -04001167 */
Josef Bacik96303082009-07-13 21:29:25 -04001168static struct btrfs_free_space *
Li Zefan34d52cb2011-03-29 13:46:06 +08001169tree_search_offset(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001170 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001171{
Li Zefan34d52cb2011-03-29 13:46:06 +08001172 struct rb_node *n = ctl->free_space_offset.rb_node;
Josef Bacik96303082009-07-13 21:29:25 -04001173 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001174
Josef Bacik96303082009-07-13 21:29:25 -04001175 /* find entry that is closest to the 'offset' */
1176 while (1) {
1177 if (!n) {
1178 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001179 break;
1180 }
Josef Bacik96303082009-07-13 21:29:25 -04001181
1182 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1183 prev = entry;
1184
1185 if (offset < entry->offset)
1186 n = n->rb_left;
1187 else if (offset > entry->offset)
1188 n = n->rb_right;
1189 else
1190 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001191 }
1192
Josef Bacik96303082009-07-13 21:29:25 -04001193 if (bitmap_only) {
1194 if (!entry)
1195 return NULL;
1196 if (entry->bitmap)
1197 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001198
Josef Bacik96303082009-07-13 21:29:25 -04001199 /*
1200 * bitmap entry and extent entry may share same offset,
1201 * in that case, bitmap entry comes after extent entry.
1202 */
1203 n = rb_next(n);
1204 if (!n)
1205 return NULL;
1206 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1207 if (entry->offset != offset)
1208 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001209
Josef Bacik96303082009-07-13 21:29:25 -04001210 WARN_ON(!entry->bitmap);
1211 return entry;
1212 } else if (entry) {
1213 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001214 /*
Josef Bacik96303082009-07-13 21:29:25 -04001215 * if previous extent entry covers the offset,
1216 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -04001217 */
Josef Bacik96303082009-07-13 21:29:25 -04001218 n = &entry->offset_index;
1219 while (1) {
1220 n = rb_prev(n);
1221 if (!n)
1222 break;
1223 prev = rb_entry(n, struct btrfs_free_space,
1224 offset_index);
1225 if (!prev->bitmap) {
1226 if (prev->offset + prev->bytes > offset)
1227 entry = prev;
1228 break;
1229 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001230 }
Josef Bacik96303082009-07-13 21:29:25 -04001231 }
1232 return entry;
1233 }
1234
1235 if (!prev)
1236 return NULL;
1237
1238 /* find last entry before the 'offset' */
1239 entry = prev;
1240 if (entry->offset > offset) {
1241 n = rb_prev(&entry->offset_index);
1242 if (n) {
1243 entry = rb_entry(n, struct btrfs_free_space,
1244 offset_index);
1245 BUG_ON(entry->offset > offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001246 } else {
Josef Bacik96303082009-07-13 21:29:25 -04001247 if (fuzzy)
1248 return entry;
1249 else
1250 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001251 }
1252 }
1253
Josef Bacik96303082009-07-13 21:29:25 -04001254 if (entry->bitmap) {
1255 n = &entry->offset_index;
1256 while (1) {
1257 n = rb_prev(n);
1258 if (!n)
1259 break;
1260 prev = rb_entry(n, struct btrfs_free_space,
1261 offset_index);
1262 if (!prev->bitmap) {
1263 if (prev->offset + prev->bytes > offset)
1264 return prev;
1265 break;
1266 }
1267 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001268 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001269 return entry;
1270 } else if (entry->offset + entry->bytes > offset)
1271 return entry;
1272
1273 if (!fuzzy)
1274 return NULL;
1275
1276 while (1) {
1277 if (entry->bitmap) {
1278 if (entry->offset + BITS_PER_BITMAP *
Li Zefan34d52cb2011-03-29 13:46:06 +08001279 ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001280 break;
1281 } else {
1282 if (entry->offset + entry->bytes > offset)
1283 break;
1284 }
1285
1286 n = rb_next(&entry->offset_index);
1287 if (!n)
1288 return NULL;
1289 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1290 }
1291 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001292}
1293
Li Zefanf333adb2010-11-09 14:57:39 +08001294static inline void
Li Zefan34d52cb2011-03-29 13:46:06 +08001295__unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001296 struct btrfs_free_space *info)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001297{
Li Zefan34d52cb2011-03-29 13:46:06 +08001298 rb_erase(&info->offset_index, &ctl->free_space_offset);
1299 ctl->free_extents--;
Li Zefanf333adb2010-11-09 14:57:39 +08001300}
1301
Li Zefan34d52cb2011-03-29 13:46:06 +08001302static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001303 struct btrfs_free_space *info)
1304{
Li Zefan34d52cb2011-03-29 13:46:06 +08001305 __unlink_free_space(ctl, info);
1306 ctl->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001307}
1308
Li Zefan34d52cb2011-03-29 13:46:06 +08001309static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0f9dd462008-09-23 13:14:11 -04001310 struct btrfs_free_space *info)
1311{
1312 int ret = 0;
1313
Josef Bacik96303082009-07-13 21:29:25 -04001314 BUG_ON(!info->bitmap && !info->bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001315 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001316 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -04001317 if (ret)
1318 return ret;
1319
Li Zefan34d52cb2011-03-29 13:46:06 +08001320 ctl->free_space += info->bytes;
1321 ctl->free_extents++;
Josef Bacik96303082009-07-13 21:29:25 -04001322 return ret;
1323}
1324
Li Zefan34d52cb2011-03-29 13:46:06 +08001325static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
Josef Bacik96303082009-07-13 21:29:25 -04001326{
Li Zefan34d52cb2011-03-29 13:46:06 +08001327 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik25891f72009-09-11 16:11:20 -04001328 u64 max_bytes;
1329 u64 bitmap_bytes;
1330 u64 extent_bytes;
Li Zefan8eb2d822010-11-09 14:48:01 +08001331 u64 size = block_group->key.offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001332 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1333 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1334
1335 BUG_ON(ctl->total_bitmaps > max_bitmaps);
Josef Bacik96303082009-07-13 21:29:25 -04001336
1337 /*
1338 * The goal is to keep the total amount of memory used per 1gb of space
1339 * at or below 32k, so we need to adjust how much memory we allow to be
1340 * used by extent based free space tracking
1341 */
Li Zefan8eb2d822010-11-09 14:48:01 +08001342 if (size < 1024 * 1024 * 1024)
1343 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1344 else
1345 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1346 div64_u64(size, 1024 * 1024 * 1024);
Josef Bacik96303082009-07-13 21:29:25 -04001347
Josef Bacik25891f72009-09-11 16:11:20 -04001348 /*
1349 * we want to account for 1 more bitmap than what we have so we can make
1350 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1351 * we add more bitmaps.
1352 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001353 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
Josef Bacik96303082009-07-13 21:29:25 -04001354
Josef Bacik25891f72009-09-11 16:11:20 -04001355 if (bitmap_bytes >= max_bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001356 ctl->extents_thresh = 0;
Josef Bacik25891f72009-09-11 16:11:20 -04001357 return;
Josef Bacik96303082009-07-13 21:29:25 -04001358 }
Josef Bacik25891f72009-09-11 16:11:20 -04001359
1360 /*
1361 * we want the extent entry threshold to always be at most 1/2 the maxw
1362 * bytes we can have, or whatever is less than that.
1363 */
1364 extent_bytes = max_bytes - bitmap_bytes;
1365 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1366
Li Zefan34d52cb2011-03-29 13:46:06 +08001367 ctl->extents_thresh =
Josef Bacik25891f72009-09-11 16:11:20 -04001368 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
Josef Bacik96303082009-07-13 21:29:25 -04001369}
1370
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001371static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1372 struct btrfs_free_space *info,
1373 u64 offset, u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001374{
Li Zefanf38b6e72011-03-14 13:40:51 +08001375 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001376
Li Zefan34d52cb2011-03-29 13:46:06 +08001377 start = offset_to_bit(info->offset, ctl->unit, offset);
1378 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001379 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001380
Li Zefanf38b6e72011-03-14 13:40:51 +08001381 bitmap_clear(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001382
1383 info->bytes -= bytes;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001384}
1385
1386static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1387 struct btrfs_free_space *info, u64 offset,
1388 u64 bytes)
1389{
1390 __bitmap_clear_bits(ctl, info, offset, bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001391 ctl->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001392}
1393
Li Zefan34d52cb2011-03-29 13:46:06 +08001394static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
Josef Bacik817d52f2009-07-13 21:29:25 -04001395 struct btrfs_free_space *info, u64 offset,
1396 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001397{
Li Zefanf38b6e72011-03-14 13:40:51 +08001398 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001399
Li Zefan34d52cb2011-03-29 13:46:06 +08001400 start = offset_to_bit(info->offset, ctl->unit, offset);
1401 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001402 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001403
Li Zefanf38b6e72011-03-14 13:40:51 +08001404 bitmap_set(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001405
1406 info->bytes += bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001407 ctl->free_space += bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001408}
1409
Li Zefan34d52cb2011-03-29 13:46:06 +08001410static int search_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001411 struct btrfs_free_space *bitmap_info, u64 *offset,
1412 u64 *bytes)
1413{
1414 unsigned long found_bits = 0;
1415 unsigned long bits, i;
1416 unsigned long next_zero;
1417
Li Zefan34d52cb2011-03-29 13:46:06 +08001418 i = offset_to_bit(bitmap_info->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04001419 max_t(u64, *offset, bitmap_info->offset));
Li Zefan34d52cb2011-03-29 13:46:06 +08001420 bits = bytes_to_bits(*bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04001421
1422 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1423 i < BITS_PER_BITMAP;
1424 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1425 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1426 BITS_PER_BITMAP, i);
1427 if ((next_zero - i) >= bits) {
1428 found_bits = next_zero - i;
1429 break;
1430 }
1431 i = next_zero;
1432 }
1433
1434 if (found_bits) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001435 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1436 *bytes = (u64)(found_bits) * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04001437 return 0;
1438 }
1439
1440 return -1;
1441}
1442
Li Zefan34d52cb2011-03-29 13:46:06 +08001443static struct btrfs_free_space *
1444find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001445{
1446 struct btrfs_free_space *entry;
1447 struct rb_node *node;
1448 int ret;
1449
Li Zefan34d52cb2011-03-29 13:46:06 +08001450 if (!ctl->free_space_offset.rb_node)
Josef Bacik96303082009-07-13 21:29:25 -04001451 return NULL;
1452
Li Zefan34d52cb2011-03-29 13:46:06 +08001453 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
Josef Bacik96303082009-07-13 21:29:25 -04001454 if (!entry)
1455 return NULL;
1456
1457 for (node = &entry->offset_index; node; node = rb_next(node)) {
1458 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1459 if (entry->bytes < *bytes)
1460 continue;
1461
1462 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001463 ret = search_bitmap(ctl, entry, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001464 if (!ret)
1465 return entry;
1466 continue;
1467 }
1468
1469 *offset = entry->offset;
1470 *bytes = entry->bytes;
1471 return entry;
1472 }
1473
1474 return NULL;
1475}
1476
Li Zefan34d52cb2011-03-29 13:46:06 +08001477static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001478 struct btrfs_free_space *info, u64 offset)
1479{
Li Zefan34d52cb2011-03-29 13:46:06 +08001480 info->offset = offset_to_bitmap(ctl, offset);
Josef Bacikf019f422009-09-11 16:11:20 -04001481 info->bytes = 0;
Alexandre Olivaf2d0f672011-11-28 12:04:43 -02001482 INIT_LIST_HEAD(&info->list);
Li Zefan34d52cb2011-03-29 13:46:06 +08001483 link_free_space(ctl, info);
1484 ctl->total_bitmaps++;
Josef Bacik96303082009-07-13 21:29:25 -04001485
Li Zefan34d52cb2011-03-29 13:46:06 +08001486 ctl->op->recalc_thresholds(ctl);
Josef Bacik96303082009-07-13 21:29:25 -04001487}
1488
Li Zefan34d52cb2011-03-29 13:46:06 +08001489static void free_bitmap(struct btrfs_free_space_ctl *ctl,
Li Zefanedf6e2d2010-11-09 14:50:07 +08001490 struct btrfs_free_space *bitmap_info)
1491{
Li Zefan34d52cb2011-03-29 13:46:06 +08001492 unlink_free_space(ctl, bitmap_info);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001493 kfree(bitmap_info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001494 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
Li Zefan34d52cb2011-03-29 13:46:06 +08001495 ctl->total_bitmaps--;
1496 ctl->op->recalc_thresholds(ctl);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001497}
1498
Li Zefan34d52cb2011-03-29 13:46:06 +08001499static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001500 struct btrfs_free_space *bitmap_info,
1501 u64 *offset, u64 *bytes)
1502{
1503 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -04001504 u64 search_start, search_bytes;
1505 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001506
1507again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001508 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001509
Josef Bacik6606bb92009-07-31 11:03:58 -04001510 /*
1511 * XXX - this can go away after a few releases.
1512 *
1513 * since the only user of btrfs_remove_free_space is the tree logging
1514 * stuff, and the only way to test that is under crash conditions, we
1515 * want to have this debug stuff here just in case somethings not
1516 * working. Search the bitmap for the space we are trying to use to
1517 * make sure its actually there. If its not there then we need to stop
1518 * because something has gone wrong.
1519 */
1520 search_start = *offset;
1521 search_bytes = *bytes;
Josef Bacik13dbc082011-02-03 02:39:52 +00001522 search_bytes = min(search_bytes, end - search_start + 1);
Li Zefan34d52cb2011-03-29 13:46:06 +08001523 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
Josef Bacik6606bb92009-07-31 11:03:58 -04001524 BUG_ON(ret < 0 || search_start != *offset);
1525
Josef Bacik96303082009-07-13 21:29:25 -04001526 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001527 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
Josef Bacik96303082009-07-13 21:29:25 -04001528 *bytes -= end - *offset + 1;
1529 *offset = end + 1;
1530 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001531 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001532 *bytes = 0;
1533 }
1534
1535 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001536 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001537 if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001538 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001539
Josef Bacik6606bb92009-07-31 11:03:58 -04001540 /*
1541 * no entry after this bitmap, but we still have bytes to
1542 * remove, so something has gone wrong.
1543 */
1544 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -04001545 return -EINVAL;
1546
Josef Bacik6606bb92009-07-31 11:03:58 -04001547 bitmap_info = rb_entry(next, struct btrfs_free_space,
1548 offset_index);
1549
1550 /*
1551 * if the next entry isn't a bitmap we need to return to let the
1552 * extent stuff do its work.
1553 */
Josef Bacik96303082009-07-13 21:29:25 -04001554 if (!bitmap_info->bitmap)
1555 return -EAGAIN;
1556
Josef Bacik6606bb92009-07-31 11:03:58 -04001557 /*
1558 * Ok the next item is a bitmap, but it may not actually hold
1559 * the information for the rest of this free space stuff, so
1560 * look for it, and if we don't find it return so we can try
1561 * everything over again.
1562 */
1563 search_start = *offset;
1564 search_bytes = *bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001565 ret = search_bitmap(ctl, bitmap_info, &search_start,
Josef Bacik6606bb92009-07-31 11:03:58 -04001566 &search_bytes);
1567 if (ret < 0 || search_start != *offset)
1568 return -EAGAIN;
1569
Josef Bacik96303082009-07-13 21:29:25 -04001570 goto again;
Li Zefanedf6e2d2010-11-09 14:50:07 +08001571 } else if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001572 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001573
1574 return 0;
1575}
1576
Josef Bacik2cdc3422011-05-27 14:07:49 -04001577static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1578 struct btrfs_free_space *info, u64 offset,
1579 u64 bytes)
1580{
1581 u64 bytes_to_set = 0;
1582 u64 end;
1583
1584 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1585
1586 bytes_to_set = min(end - offset, bytes);
1587
1588 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1589
1590 return bytes_to_set;
1591
1592}
1593
Li Zefan34d52cb2011-03-29 13:46:06 +08001594static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1595 struct btrfs_free_space *info)
Josef Bacik96303082009-07-13 21:29:25 -04001596{
Li Zefan34d52cb2011-03-29 13:46:06 +08001597 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik96303082009-07-13 21:29:25 -04001598
1599 /*
1600 * If we are below the extents threshold then we can add this as an
1601 * extent, and don't have to deal with the bitmap
1602 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001603 if (ctl->free_extents < ctl->extents_thresh) {
Josef Bacik32cb0842011-03-18 16:16:21 -04001604 /*
1605 * If this block group has some small extents we don't want to
1606 * use up all of our free slots in the cache with them, we want
1607 * to reserve them to larger extents, however if we have plent
1608 * of cache left then go ahead an dadd them, no sense in adding
1609 * the overhead of a bitmap if we don't have to.
1610 */
1611 if (info->bytes <= block_group->sectorsize * 4) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001612 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1613 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001614 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001615 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001616 }
1617 }
Josef Bacik96303082009-07-13 21:29:25 -04001618
1619 /*
1620 * some block groups are so tiny they can't be enveloped by a bitmap, so
1621 * don't even bother to create a bitmap for this
1622 */
1623 if (BITS_PER_BITMAP * block_group->sectorsize >
1624 block_group->key.offset)
Li Zefan34d52cb2011-03-29 13:46:06 +08001625 return false;
1626
1627 return true;
1628}
1629
Josef Bacik2cdc3422011-05-27 14:07:49 -04001630static struct btrfs_free_space_op free_space_op = {
1631 .recalc_thresholds = recalculate_thresholds,
1632 .use_bitmap = use_bitmap,
1633};
1634
Li Zefan34d52cb2011-03-29 13:46:06 +08001635static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1636 struct btrfs_free_space *info)
1637{
1638 struct btrfs_free_space *bitmap_info;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001639 struct btrfs_block_group_cache *block_group = NULL;
Li Zefan34d52cb2011-03-29 13:46:06 +08001640 int added = 0;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001641 u64 bytes, offset, bytes_added;
Li Zefan34d52cb2011-03-29 13:46:06 +08001642 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001643
1644 bytes = info->bytes;
1645 offset = info->offset;
1646
Li Zefan34d52cb2011-03-29 13:46:06 +08001647 if (!ctl->op->use_bitmap(ctl, info))
1648 return 0;
1649
Josef Bacik2cdc3422011-05-27 14:07:49 -04001650 if (ctl->op == &free_space_op)
1651 block_group = ctl->private;
Chris Mason38e87882011-06-10 16:36:57 -04001652again:
Josef Bacik2cdc3422011-05-27 14:07:49 -04001653 /*
1654 * Since we link bitmaps right into the cluster we need to see if we
1655 * have a cluster here, and if so and it has our bitmap we need to add
1656 * the free space to that bitmap.
1657 */
1658 if (block_group && !list_empty(&block_group->cluster_list)) {
1659 struct btrfs_free_cluster *cluster;
1660 struct rb_node *node;
1661 struct btrfs_free_space *entry;
1662
1663 cluster = list_entry(block_group->cluster_list.next,
1664 struct btrfs_free_cluster,
1665 block_group_list);
1666 spin_lock(&cluster->lock);
1667 node = rb_first(&cluster->root);
1668 if (!node) {
1669 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001670 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001671 }
1672
1673 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1674 if (!entry->bitmap) {
1675 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001676 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001677 }
1678
1679 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1680 bytes_added = add_bytes_to_bitmap(ctl, entry,
1681 offset, bytes);
1682 bytes -= bytes_added;
1683 offset += bytes_added;
1684 }
1685 spin_unlock(&cluster->lock);
1686 if (!bytes) {
1687 ret = 1;
1688 goto out;
1689 }
1690 }
Chris Mason38e87882011-06-10 16:36:57 -04001691
1692no_cluster_bitmap:
Li Zefan34d52cb2011-03-29 13:46:06 +08001693 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik96303082009-07-13 21:29:25 -04001694 1, 0);
1695 if (!bitmap_info) {
1696 BUG_ON(added);
1697 goto new_bitmap;
1698 }
1699
Josef Bacik2cdc3422011-05-27 14:07:49 -04001700 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1701 bytes -= bytes_added;
1702 offset += bytes_added;
1703 added = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001704
1705 if (!bytes) {
1706 ret = 1;
1707 goto out;
1708 } else
1709 goto again;
1710
1711new_bitmap:
1712 if (info && info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001713 add_new_bitmap(ctl, info, offset);
Josef Bacik96303082009-07-13 21:29:25 -04001714 added = 1;
1715 info = NULL;
1716 goto again;
1717 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001718 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001719
1720 /* no pre-allocated info, allocate a new one */
1721 if (!info) {
Josef Bacikdc89e982011-01-28 17:05:48 -05001722 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1723 GFP_NOFS);
Josef Bacik96303082009-07-13 21:29:25 -04001724 if (!info) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001725 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001726 ret = -ENOMEM;
1727 goto out;
1728 }
1729 }
1730
1731 /* allocate the bitmap */
1732 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
Li Zefan34d52cb2011-03-29 13:46:06 +08001733 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001734 if (!info->bitmap) {
1735 ret = -ENOMEM;
1736 goto out;
1737 }
1738 goto again;
1739 }
1740
1741out:
1742 if (info) {
1743 if (info->bitmap)
1744 kfree(info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001745 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001746 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001747
1748 return ret;
1749}
1750
Chris Mason945d8962011-05-22 12:33:42 -04001751static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001752 struct btrfs_free_space *info, bool update_stat)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001753{
Li Zefan120d66e2010-11-09 14:56:50 +08001754 struct btrfs_free_space *left_info;
1755 struct btrfs_free_space *right_info;
1756 bool merged = false;
1757 u64 offset = info->offset;
1758 u64 bytes = info->bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001759
Josef Bacik0f9dd462008-09-23 13:14:11 -04001760 /*
1761 * first we want to see if there is free space adjacent to the range we
1762 * are adding, if there is remove that struct and add a new one to
1763 * cover the entire range
1764 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001765 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001766 if (right_info && rb_prev(&right_info->offset_index))
1767 left_info = rb_entry(rb_prev(&right_info->offset_index),
1768 struct btrfs_free_space, offset_index);
1769 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001770 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001771
Josef Bacik96303082009-07-13 21:29:25 -04001772 if (right_info && !right_info->bitmap) {
Li Zefanf333adb2010-11-09 14:57:39 +08001773 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001774 unlink_free_space(ctl, right_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001775 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001776 __unlink_free_space(ctl, right_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001777 info->bytes += right_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001778 kmem_cache_free(btrfs_free_space_cachep, right_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001779 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001780 }
1781
Josef Bacik96303082009-07-13 21:29:25 -04001782 if (left_info && !left_info->bitmap &&
1783 left_info->offset + left_info->bytes == offset) {
Li Zefanf333adb2010-11-09 14:57:39 +08001784 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001785 unlink_free_space(ctl, left_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001786 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001787 __unlink_free_space(ctl, left_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001788 info->offset = left_info->offset;
1789 info->bytes += left_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001790 kmem_cache_free(btrfs_free_space_cachep, left_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001791 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001792 }
1793
Li Zefan120d66e2010-11-09 14:56:50 +08001794 return merged;
1795}
1796
Li Zefan581bb052011-04-20 10:06:11 +08001797int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1798 u64 offset, u64 bytes)
Li Zefan120d66e2010-11-09 14:56:50 +08001799{
1800 struct btrfs_free_space *info;
1801 int ret = 0;
1802
Josef Bacikdc89e982011-01-28 17:05:48 -05001803 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
Li Zefan120d66e2010-11-09 14:56:50 +08001804 if (!info)
1805 return -ENOMEM;
1806
1807 info->offset = offset;
1808 info->bytes = bytes;
1809
Li Zefan34d52cb2011-03-29 13:46:06 +08001810 spin_lock(&ctl->tree_lock);
Li Zefan120d66e2010-11-09 14:56:50 +08001811
Li Zefan34d52cb2011-03-29 13:46:06 +08001812 if (try_merge_free_space(ctl, info, true))
Li Zefan120d66e2010-11-09 14:56:50 +08001813 goto link;
1814
1815 /*
1816 * There was no extent directly to the left or right of this new
1817 * extent then we know we're going to have to allocate a new extent, so
1818 * before we do that see if we need to drop this into a bitmap
1819 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001820 ret = insert_into_bitmap(ctl, info);
Li Zefan120d66e2010-11-09 14:56:50 +08001821 if (ret < 0) {
1822 goto out;
1823 } else if (ret) {
1824 ret = 0;
1825 goto out;
1826 }
1827link:
Li Zefan34d52cb2011-03-29 13:46:06 +08001828 ret = link_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001829 if (ret)
Josef Bacikdc89e982011-01-28 17:05:48 -05001830 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001831out:
Li Zefan34d52cb2011-03-29 13:46:06 +08001832 spin_unlock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001833
Josef Bacik0f9dd462008-09-23 13:14:11 -04001834 if (ret) {
Josef Bacik96303082009-07-13 21:29:25 -04001835 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04001836 BUG_ON(ret == -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001837 }
1838
Josef Bacik0f9dd462008-09-23 13:14:11 -04001839 return ret;
1840}
1841
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001842int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1843 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001844{
Li Zefan34d52cb2011-03-29 13:46:06 +08001845 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001846 struct btrfs_free_space *info;
Josef Bacik96303082009-07-13 21:29:25 -04001847 struct btrfs_free_space *next_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001848 int ret = 0;
1849
Li Zefan34d52cb2011-03-29 13:46:06 +08001850 spin_lock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001851
Josef Bacik96303082009-07-13 21:29:25 -04001852again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001853 info = tree_search_offset(ctl, offset, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001854 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001855 /*
1856 * oops didn't find an extent that matched the space we wanted
1857 * to remove, look for a bitmap instead
1858 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001859 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik6606bb92009-07-31 11:03:58 -04001860 1, 0);
1861 if (!info) {
Chris Mason24a70312011-11-21 09:39:11 -05001862 /* the tree logging code might be calling us before we
1863 * have fully loaded the free space rbtree for this
1864 * block group. So it is possible the entry won't
1865 * be in the rbtree yet at all. The caching code
1866 * will make sure not to put it in the rbtree if
1867 * the logging code has pinned it.
1868 */
Josef Bacik6606bb92009-07-31 11:03:58 -04001869 goto out_lock;
1870 }
Josef Bacik96303082009-07-13 21:29:25 -04001871 }
1872
1873 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1874 u64 end;
1875 next_info = rb_entry(rb_next(&info->offset_index),
1876 struct btrfs_free_space,
1877 offset_index);
1878
1879 if (next_info->bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001880 end = next_info->offset +
1881 BITS_PER_BITMAP * ctl->unit - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001882 else
1883 end = next_info->offset + next_info->bytes;
1884
1885 if (next_info->bytes < bytes ||
1886 next_info->offset > offset || offset > end) {
1887 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1888 " trying to use %llu\n",
1889 (unsigned long long)info->offset,
1890 (unsigned long long)info->bytes,
1891 (unsigned long long)bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001892 WARN_ON(1);
1893 ret = -EINVAL;
Josef Bacik96303082009-07-13 21:29:25 -04001894 goto out_lock;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001895 }
Josef Bacik96303082009-07-13 21:29:25 -04001896
1897 info = next_info;
1898 }
1899
1900 if (info->bytes == bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001901 unlink_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001902 if (info->bitmap) {
1903 kfree(info->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08001904 ctl->total_bitmaps--;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001905 }
Josef Bacikdc89e982011-01-28 17:05:48 -05001906 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason1eae31e2011-10-14 06:31:20 -04001907 ret = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001908 goto out_lock;
1909 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001910
Josef Bacik96303082009-07-13 21:29:25 -04001911 if (!info->bitmap && info->offset == offset) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001912 unlink_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001913 info->offset += bytes;
1914 info->bytes -= bytes;
Chris Mason1eae31e2011-10-14 06:31:20 -04001915 ret = link_free_space(ctl, info);
1916 WARN_ON(ret);
Josef Bacik96303082009-07-13 21:29:25 -04001917 goto out_lock;
1918 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001919
Josef Bacik96303082009-07-13 21:29:25 -04001920 if (!info->bitmap && info->offset <= offset &&
1921 info->offset + info->bytes >= offset + bytes) {
Chris Mason9b49c9b2008-09-24 11:23:25 -04001922 u64 old_start = info->offset;
1923 /*
1924 * we're freeing space in the middle of the info,
1925 * this can happen during tree log replay
1926 *
1927 * first unlink the old info and then
1928 * insert it again after the hole we're creating
1929 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001930 unlink_free_space(ctl, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001931 if (offset + bytes < info->offset + info->bytes) {
1932 u64 old_end = info->offset + info->bytes;
1933
1934 info->offset = offset + bytes;
1935 info->bytes = old_end - info->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001936 ret = link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001937 WARN_ON(ret);
1938 if (ret)
1939 goto out_lock;
Chris Mason9b49c9b2008-09-24 11:23:25 -04001940 } else {
1941 /* the hole we're creating ends at the end
1942 * of the info struct, just free the info
1943 */
Josef Bacikdc89e982011-01-28 17:05:48 -05001944 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001945 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001946 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001947
1948 /* step two, insert a new info struct to cover
1949 * anything before the hole
Chris Mason9b49c9b2008-09-24 11:23:25 -04001950 */
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001951 ret = btrfs_add_free_space(block_group, old_start,
1952 offset - old_start);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001953 WARN_ON(ret); /* -ENOMEM */
Josef Bacik96303082009-07-13 21:29:25 -04001954 goto out;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001955 }
Josef Bacik96303082009-07-13 21:29:25 -04001956
Li Zefan34d52cb2011-03-29 13:46:06 +08001957 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001958 if (ret == -EAGAIN)
1959 goto again;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001960 BUG_ON(ret); /* logic error */
Josef Bacik96303082009-07-13 21:29:25 -04001961out_lock:
Li Zefan34d52cb2011-03-29 13:46:06 +08001962 spin_unlock(&ctl->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001963out:
Josef Bacik25179202008-10-29 14:49:05 -04001964 return ret;
1965}
1966
Josef Bacik0f9dd462008-09-23 13:14:11 -04001967void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1968 u64 bytes)
1969{
Li Zefan34d52cb2011-03-29 13:46:06 +08001970 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001971 struct btrfs_free_space *info;
1972 struct rb_node *n;
1973 int count = 0;
1974
Li Zefan34d52cb2011-03-29 13:46:06 +08001975 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001976 info = rb_entry(n, struct btrfs_free_space, offset_index);
1977 if (info->bytes >= bytes)
1978 count++;
Josef Bacik96303082009-07-13 21:29:25 -04001979 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
Joel Becker21380932009-04-21 12:38:29 -07001980 (unsigned long long)info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001981 (unsigned long long)info->bytes,
1982 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001983 }
Josef Bacik96303082009-07-13 21:29:25 -04001984 printk(KERN_INFO "block group has cluster?: %s\n",
1985 list_empty(&block_group->cluster_list) ? "no" : "yes");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001986 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1987 "\n", count);
1988}
1989
Li Zefan34d52cb2011-03-29 13:46:06 +08001990void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001991{
Li Zefan34d52cb2011-03-29 13:46:06 +08001992 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001993
Li Zefan34d52cb2011-03-29 13:46:06 +08001994 spin_lock_init(&ctl->tree_lock);
1995 ctl->unit = block_group->sectorsize;
1996 ctl->start = block_group->key.objectid;
1997 ctl->private = block_group;
1998 ctl->op = &free_space_op;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001999
Li Zefan34d52cb2011-03-29 13:46:06 +08002000 /*
2001 * we only want to have 32k of ram per block group for keeping
2002 * track of free space, and if we pass 1/2 of that we want to
2003 * start converting things over to using bitmaps
2004 */
2005 ctl->extents_thresh = ((1024 * 32) / 2) /
2006 sizeof(struct btrfs_free_space);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002007}
2008
Chris Masonfa9c0d792009-04-03 09:47:43 -04002009/*
2010 * for a given cluster, put all of its extents back into the free
2011 * space cache. If the block group passed doesn't match the block group
2012 * pointed to by the cluster, someone else raced in and freed the
2013 * cluster already. In that case, we just return without changing anything
2014 */
2015static int
2016__btrfs_return_cluster_to_free_space(
2017 struct btrfs_block_group_cache *block_group,
2018 struct btrfs_free_cluster *cluster)
2019{
Li Zefan34d52cb2011-03-29 13:46:06 +08002020 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002021 struct btrfs_free_space *entry;
2022 struct rb_node *node;
2023
2024 spin_lock(&cluster->lock);
2025 if (cluster->block_group != block_group)
2026 goto out;
2027
Josef Bacik96303082009-07-13 21:29:25 -04002028 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002029 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002030 list_del_init(&cluster->block_group_list);
Josef Bacik96303082009-07-13 21:29:25 -04002031
Chris Masonfa9c0d792009-04-03 09:47:43 -04002032 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -04002033 while (node) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002034 bool bitmap;
2035
Chris Masonfa9c0d792009-04-03 09:47:43 -04002036 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2037 node = rb_next(&entry->offset_index);
2038 rb_erase(&entry->offset_index, &cluster->root);
Josef Bacik4e69b592011-03-21 10:11:24 -04002039
2040 bitmap = (entry->bitmap != NULL);
2041 if (!bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08002042 try_merge_free_space(ctl, entry, false);
2043 tree_insert_offset(&ctl->free_space_offset,
Josef Bacik4e69b592011-03-21 10:11:24 -04002044 entry->offset, &entry->offset_index, bitmap);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002045 }
Eric Paris6bef4d32010-02-23 19:43:04 +00002046 cluster->root = RB_ROOT;
Josef Bacik96303082009-07-13 21:29:25 -04002047
Chris Masonfa9c0d792009-04-03 09:47:43 -04002048out:
2049 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002050 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002051 return 0;
2052}
2053
Chris Mason09655372011-05-21 09:27:38 -04002054void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002055{
2056 struct btrfs_free_space *info;
2057 struct rb_node *node;
Li Zefan581bb052011-04-20 10:06:11 +08002058
Li Zefan581bb052011-04-20 10:06:11 +08002059 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2060 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik9b90f512011-06-24 16:02:51 +00002061 if (!info->bitmap) {
2062 unlink_free_space(ctl, info);
2063 kmem_cache_free(btrfs_free_space_cachep, info);
2064 } else {
2065 free_bitmap(ctl, info);
2066 }
Li Zefan581bb052011-04-20 10:06:11 +08002067 if (need_resched()) {
2068 spin_unlock(&ctl->tree_lock);
2069 cond_resched();
2070 spin_lock(&ctl->tree_lock);
2071 }
2072 }
Chris Mason09655372011-05-21 09:27:38 -04002073}
2074
2075void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2076{
2077 spin_lock(&ctl->tree_lock);
2078 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan581bb052011-04-20 10:06:11 +08002079 spin_unlock(&ctl->tree_lock);
2080}
2081
Josef Bacik0f9dd462008-09-23 13:14:11 -04002082void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2083{
Li Zefan34d52cb2011-03-29 13:46:06 +08002084 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002085 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -04002086 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002087
Li Zefan34d52cb2011-03-29 13:46:06 +08002088 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002089 while ((head = block_group->cluster_list.next) !=
2090 &block_group->cluster_list) {
2091 cluster = list_entry(head, struct btrfs_free_cluster,
2092 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002093
2094 WARN_ON(cluster->block_group != block_group);
2095 __btrfs_return_cluster_to_free_space(block_group, cluster);
Josef Bacik96303082009-07-13 21:29:25 -04002096 if (need_resched()) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002097 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002098 cond_resched();
Li Zefan34d52cb2011-03-29 13:46:06 +08002099 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002100 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002101 }
Chris Mason09655372011-05-21 09:27:38 -04002102 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan34d52cb2011-03-29 13:46:06 +08002103 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002104
Josef Bacik0f9dd462008-09-23 13:14:11 -04002105}
2106
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002107u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2108 u64 offset, u64 bytes, u64 empty_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002109{
Li Zefan34d52cb2011-03-29 13:46:06 +08002110 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002111 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -04002112 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002113 u64 ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002114
Li Zefan34d52cb2011-03-29 13:46:06 +08002115 spin_lock(&ctl->tree_lock);
2116 entry = find_free_space(ctl, &offset, &bytes_search);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002117 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -04002118 goto out;
2119
2120 ret = offset;
2121 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002122 bitmap_clear_bits(ctl, entry, offset, bytes);
Li Zefanedf6e2d2010-11-09 14:50:07 +08002123 if (!entry->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08002124 free_bitmap(ctl, entry);
Josef Bacik96303082009-07-13 21:29:25 -04002125 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08002126 unlink_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002127 entry->offset += bytes;
2128 entry->bytes -= bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002129 if (!entry->bytes)
Josef Bacikdc89e982011-01-28 17:05:48 -05002130 kmem_cache_free(btrfs_free_space_cachep, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002131 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002132 link_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002133 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002134
Josef Bacik96303082009-07-13 21:29:25 -04002135out:
Li Zefan34d52cb2011-03-29 13:46:06 +08002136 spin_unlock(&ctl->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04002137
Josef Bacik0f9dd462008-09-23 13:14:11 -04002138 return ret;
2139}
Chris Masonfa9c0d792009-04-03 09:47:43 -04002140
2141/*
2142 * given a cluster, put all of its extents back into the free space
2143 * cache. If a block group is passed, this function will only free
2144 * a cluster that belongs to the passed block group.
2145 *
2146 * Otherwise, it'll get a reference on the block group pointed to by the
2147 * cluster and remove the cluster from it.
2148 */
2149int btrfs_return_cluster_to_free_space(
2150 struct btrfs_block_group_cache *block_group,
2151 struct btrfs_free_cluster *cluster)
2152{
Li Zefan34d52cb2011-03-29 13:46:06 +08002153 struct btrfs_free_space_ctl *ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002154 int ret;
2155
2156 /* first, get a safe pointer to the block group */
2157 spin_lock(&cluster->lock);
2158 if (!block_group) {
2159 block_group = cluster->block_group;
2160 if (!block_group) {
2161 spin_unlock(&cluster->lock);
2162 return 0;
2163 }
2164 } else if (cluster->block_group != block_group) {
2165 /* someone else has already freed it don't redo their work */
2166 spin_unlock(&cluster->lock);
2167 return 0;
2168 }
2169 atomic_inc(&block_group->count);
2170 spin_unlock(&cluster->lock);
2171
Li Zefan34d52cb2011-03-29 13:46:06 +08002172 ctl = block_group->free_space_ctl;
2173
Chris Masonfa9c0d792009-04-03 09:47:43 -04002174 /* now return any extents the cluster had on it */
Li Zefan34d52cb2011-03-29 13:46:06 +08002175 spin_lock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002176 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
Li Zefan34d52cb2011-03-29 13:46:06 +08002177 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002178
2179 /* finally drop our ref */
2180 btrfs_put_block_group(block_group);
2181 return ret;
2182}
2183
Josef Bacik96303082009-07-13 21:29:25 -04002184static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2185 struct btrfs_free_cluster *cluster,
Josef Bacik4e69b592011-03-21 10:11:24 -04002186 struct btrfs_free_space *entry,
Josef Bacik96303082009-07-13 21:29:25 -04002187 u64 bytes, u64 min_start)
2188{
Li Zefan34d52cb2011-03-29 13:46:06 +08002189 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002190 int err;
2191 u64 search_start = cluster->window_start;
2192 u64 search_bytes = bytes;
2193 u64 ret = 0;
2194
Josef Bacik96303082009-07-13 21:29:25 -04002195 search_start = min_start;
2196 search_bytes = bytes;
2197
Li Zefan34d52cb2011-03-29 13:46:06 +08002198 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002199 if (err)
Josef Bacik4e69b592011-03-21 10:11:24 -04002200 return 0;
Josef Bacik96303082009-07-13 21:29:25 -04002201
2202 ret = search_start;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00002203 __bitmap_clear_bits(ctl, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002204
2205 return ret;
2206}
2207
Chris Masonfa9c0d792009-04-03 09:47:43 -04002208/*
2209 * given a cluster, try to allocate 'bytes' from it, returns 0
2210 * if it couldn't find anything suitably large, or a logical disk offset
2211 * if things worked out
2212 */
2213u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2214 struct btrfs_free_cluster *cluster, u64 bytes,
2215 u64 min_start)
2216{
Li Zefan34d52cb2011-03-29 13:46:06 +08002217 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002218 struct btrfs_free_space *entry = NULL;
2219 struct rb_node *node;
2220 u64 ret = 0;
2221
2222 spin_lock(&cluster->lock);
2223 if (bytes > cluster->max_size)
2224 goto out;
2225
2226 if (cluster->block_group != block_group)
2227 goto out;
2228
2229 node = rb_first(&cluster->root);
2230 if (!node)
2231 goto out;
2232
2233 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002234 while(1) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002235 if (entry->bytes < bytes ||
2236 (!entry->bitmap && entry->offset < min_start)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002237 node = rb_next(&entry->offset_index);
2238 if (!node)
2239 break;
2240 entry = rb_entry(node, struct btrfs_free_space,
2241 offset_index);
2242 continue;
2243 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002244
Josef Bacik4e69b592011-03-21 10:11:24 -04002245 if (entry->bitmap) {
2246 ret = btrfs_alloc_from_bitmap(block_group,
2247 cluster, entry, bytes,
Josef Bacik0b4a9d22012-01-26 15:01:11 -05002248 cluster->window_start);
Josef Bacik4e69b592011-03-21 10:11:24 -04002249 if (ret == 0) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002250 node = rb_next(&entry->offset_index);
2251 if (!node)
2252 break;
2253 entry = rb_entry(node, struct btrfs_free_space,
2254 offset_index);
2255 continue;
2256 }
Josef Bacik9b230622012-01-26 15:01:12 -05002257 cluster->window_start += bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04002258 } else {
Josef Bacik4e69b592011-03-21 10:11:24 -04002259 ret = entry->offset;
2260
2261 entry->offset += bytes;
2262 entry->bytes -= bytes;
2263 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002264
Li Zefan5e71b5d2010-11-09 14:55:34 +08002265 if (entry->bytes == 0)
Chris Masonfa9c0d792009-04-03 09:47:43 -04002266 rb_erase(&entry->offset_index, &cluster->root);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002267 break;
2268 }
2269out:
2270 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002271
Li Zefan5e71b5d2010-11-09 14:55:34 +08002272 if (!ret)
2273 return 0;
2274
Li Zefan34d52cb2011-03-29 13:46:06 +08002275 spin_lock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002276
Li Zefan34d52cb2011-03-29 13:46:06 +08002277 ctl->free_space -= bytes;
Li Zefan5e71b5d2010-11-09 14:55:34 +08002278 if (entry->bytes == 0) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002279 ctl->free_extents--;
Josef Bacik4e69b592011-03-21 10:11:24 -04002280 if (entry->bitmap) {
2281 kfree(entry->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08002282 ctl->total_bitmaps--;
2283 ctl->op->recalc_thresholds(ctl);
Josef Bacik4e69b592011-03-21 10:11:24 -04002284 }
Josef Bacikdc89e982011-01-28 17:05:48 -05002285 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002286 }
2287
Li Zefan34d52cb2011-03-29 13:46:06 +08002288 spin_unlock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002289
Chris Masonfa9c0d792009-04-03 09:47:43 -04002290 return ret;
2291}
2292
Josef Bacik96303082009-07-13 21:29:25 -04002293static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2294 struct btrfs_free_space *entry,
2295 struct btrfs_free_cluster *cluster,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002296 u64 offset, u64 bytes,
2297 u64 cont1_bytes, u64 min_bytes)
Josef Bacik96303082009-07-13 21:29:25 -04002298{
Li Zefan34d52cb2011-03-29 13:46:06 +08002299 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002300 unsigned long next_zero;
2301 unsigned long i;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002302 unsigned long want_bits;
2303 unsigned long min_bits;
Josef Bacik96303082009-07-13 21:29:25 -04002304 unsigned long found_bits;
2305 unsigned long start = 0;
2306 unsigned long total_found = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002307 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04002308
2309 i = offset_to_bit(entry->offset, block_group->sectorsize,
2310 max_t(u64, offset, entry->offset));
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002311 want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2312 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -04002313
2314again:
2315 found_bits = 0;
2316 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2317 i < BITS_PER_BITMAP;
2318 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2319 next_zero = find_next_zero_bit(entry->bitmap,
2320 BITS_PER_BITMAP, i);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002321 if (next_zero - i >= min_bits) {
Josef Bacik96303082009-07-13 21:29:25 -04002322 found_bits = next_zero - i;
2323 break;
2324 }
2325 i = next_zero;
2326 }
2327
2328 if (!found_bits)
Josef Bacik4e69b592011-03-21 10:11:24 -04002329 return -ENOSPC;
Josef Bacik96303082009-07-13 21:29:25 -04002330
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002331 if (!total_found) {
Josef Bacik96303082009-07-13 21:29:25 -04002332 start = i;
Alexandre Olivab78d09b2011-11-30 13:43:00 -05002333 cluster->max_size = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002334 }
2335
2336 total_found += found_bits;
2337
2338 if (cluster->max_size < found_bits * block_group->sectorsize)
2339 cluster->max_size = found_bits * block_group->sectorsize;
2340
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002341 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2342 i = next_zero + 1;
Josef Bacik96303082009-07-13 21:29:25 -04002343 goto again;
2344 }
2345
2346 cluster->window_start = start * block_group->sectorsize +
2347 entry->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08002348 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002349 ret = tree_insert_offset(&cluster->root, entry->offset,
2350 &entry->offset_index, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002351 BUG_ON(ret); /* -EEXIST; Logic error */
Josef Bacik96303082009-07-13 21:29:25 -04002352
Josef Bacik3f7de032011-11-10 08:29:20 -05002353 trace_btrfs_setup_cluster(block_group, cluster,
2354 total_found * block_group->sectorsize, 1);
Josef Bacik96303082009-07-13 21:29:25 -04002355 return 0;
2356}
2357
Chris Masonfa9c0d792009-04-03 09:47:43 -04002358/*
Josef Bacik4e69b592011-03-21 10:11:24 -04002359 * This searches the block group for just extents to fill the cluster with.
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002360 * Try to find a cluster with at least bytes total bytes, at least one
2361 * extent of cont1_bytes, and other clusters of at least min_bytes.
Josef Bacik4e69b592011-03-21 10:11:24 -04002362 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002363static noinline int
2364setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2365 struct btrfs_free_cluster *cluster,
2366 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002367 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002368{
Li Zefan34d52cb2011-03-29 13:46:06 +08002369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002370 struct btrfs_free_space *first = NULL;
2371 struct btrfs_free_space *entry = NULL;
Josef Bacik4e69b592011-03-21 10:11:24 -04002372 struct btrfs_free_space *last;
2373 struct rb_node *node;
2374 u64 window_start;
2375 u64 window_free;
2376 u64 max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05002377 u64 total_size = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002378
Li Zefan34d52cb2011-03-29 13:46:06 +08002379 entry = tree_search_offset(ctl, offset, 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002380 if (!entry)
2381 return -ENOSPC;
2382
2383 /*
2384 * We don't want bitmaps, so just move along until we find a normal
2385 * extent entry.
2386 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002387 while (entry->bitmap || entry->bytes < min_bytes) {
2388 if (entry->bitmap && list_empty(&entry->list))
Josef Bacik86d4a772011-05-25 13:03:16 -04002389 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002390 node = rb_next(&entry->offset_index);
2391 if (!node)
2392 return -ENOSPC;
2393 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2394 }
2395
2396 window_start = entry->offset;
2397 window_free = entry->bytes;
2398 max_extent = entry->bytes;
2399 first = entry;
2400 last = entry;
Josef Bacik4e69b592011-03-21 10:11:24 -04002401
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002402 for (node = rb_next(&entry->offset_index); node;
2403 node = rb_next(&entry->offset_index)) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002404 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2405
Josef Bacik86d4a772011-05-25 13:03:16 -04002406 if (entry->bitmap) {
2407 if (list_empty(&entry->list))
2408 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002409 continue;
Josef Bacik86d4a772011-05-25 13:03:16 -04002410 }
2411
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002412 if (entry->bytes < min_bytes)
2413 continue;
2414
2415 last = entry;
2416 window_free += entry->bytes;
2417 if (entry->bytes > max_extent)
Josef Bacik4e69b592011-03-21 10:11:24 -04002418 max_extent = entry->bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04002419 }
2420
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002421 if (window_free < bytes || max_extent < cont1_bytes)
2422 return -ENOSPC;
2423
Josef Bacik4e69b592011-03-21 10:11:24 -04002424 cluster->window_start = first->offset;
2425
2426 node = &first->offset_index;
2427
2428 /*
2429 * now we've found our entries, pull them out of the free space
2430 * cache and put them into the cluster rbtree
2431 */
2432 do {
2433 int ret;
2434
2435 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2436 node = rb_next(&entry->offset_index);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002437 if (entry->bitmap || entry->bytes < min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002438 continue;
2439
Li Zefan34d52cb2011-03-29 13:46:06 +08002440 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002441 ret = tree_insert_offset(&cluster->root, entry->offset,
2442 &entry->offset_index, 0);
Josef Bacik3f7de032011-11-10 08:29:20 -05002443 total_size += entry->bytes;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002444 BUG_ON(ret); /* -EEXIST; Logic error */
Josef Bacik4e69b592011-03-21 10:11:24 -04002445 } while (node && entry != last);
2446
2447 cluster->max_size = max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05002448 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
Josef Bacik4e69b592011-03-21 10:11:24 -04002449 return 0;
2450}
2451
2452/*
2453 * This specifically looks for bitmaps that may work in the cluster, we assume
2454 * that we have already failed to find extents that will work.
2455 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002456static noinline int
2457setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2458 struct btrfs_free_cluster *cluster,
2459 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002460 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002461{
Li Zefan34d52cb2011-03-29 13:46:06 +08002462 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002463 struct btrfs_free_space *entry;
Josef Bacik4e69b592011-03-21 10:11:24 -04002464 int ret = -ENOSPC;
Li Zefan0f0fbf12011-11-20 07:33:38 -05002465 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002466
Li Zefan34d52cb2011-03-29 13:46:06 +08002467 if (ctl->total_bitmaps == 0)
Josef Bacik4e69b592011-03-21 10:11:24 -04002468 return -ENOSPC;
2469
Josef Bacik86d4a772011-05-25 13:03:16 -04002470 /*
Li Zefan0f0fbf12011-11-20 07:33:38 -05002471 * The bitmap that covers offset won't be in the list unless offset
2472 * is just its start offset.
2473 */
2474 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2475 if (entry->offset != bitmap_offset) {
2476 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2477 if (entry && list_empty(&entry->list))
2478 list_add(&entry->list, bitmaps);
2479 }
2480
Josef Bacik86d4a772011-05-25 13:03:16 -04002481 list_for_each_entry(entry, bitmaps, list) {
Josef Bacik357b9782012-01-26 15:01:11 -05002482 if (entry->bytes < bytes)
Josef Bacik86d4a772011-05-25 13:03:16 -04002483 continue;
2484 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002485 bytes, cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04002486 if (!ret)
2487 return 0;
2488 }
2489
2490 /*
Li Zefan52621cb2011-11-20 07:33:38 -05002491 * The bitmaps list has all the bitmaps that record free space
2492 * starting after offset, so no more search is required.
Josef Bacik86d4a772011-05-25 13:03:16 -04002493 */
Li Zefan52621cb2011-11-20 07:33:38 -05002494 return -ENOSPC;
Josef Bacik4e69b592011-03-21 10:11:24 -04002495}
2496
2497/*
Chris Masonfa9c0d792009-04-03 09:47:43 -04002498 * here we try to find a cluster of blocks in a block group. The goal
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002499 * is to find at least bytes+empty_size.
Chris Masonfa9c0d792009-04-03 09:47:43 -04002500 * We might not find them all in one contiguous area.
2501 *
2502 * returns zero and sets up cluster if things worked out, otherwise
2503 * it returns -enospc
2504 */
2505int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Chris Mason451d7582009-06-09 20:28:34 -04002506 struct btrfs_root *root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04002507 struct btrfs_block_group_cache *block_group,
2508 struct btrfs_free_cluster *cluster,
2509 u64 offset, u64 bytes, u64 empty_size)
2510{
Li Zefan34d52cb2011-03-29 13:46:06 +08002511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik86d4a772011-05-25 13:03:16 -04002512 struct btrfs_free_space *entry, *tmp;
Li Zefan52621cb2011-11-20 07:33:38 -05002513 LIST_HEAD(bitmaps);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002514 u64 min_bytes;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002515 u64 cont1_bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002516 int ret;
2517
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002518 /*
2519 * Choose the minimum extent size we'll require for this
2520 * cluster. For SSD_SPREAD, don't allow any fragmentation.
2521 * For metadata, allow allocates with smaller extents. For
2522 * data, keep it dense.
2523 */
Chris Mason451d7582009-06-09 20:28:34 -04002524 if (btrfs_test_opt(root, SSD_SPREAD)) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002525 cont1_bytes = min_bytes = bytes + empty_size;
Chris Mason451d7582009-06-09 20:28:34 -04002526 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002527 cont1_bytes = bytes;
2528 min_bytes = block_group->sectorsize;
2529 } else {
2530 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2531 min_bytes = block_group->sectorsize;
2532 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002533
Li Zefan34d52cb2011-03-29 13:46:06 +08002534 spin_lock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002535
2536 /*
2537 * If we know we don't have enough space to make a cluster don't even
2538 * bother doing all the work to try and find one.
2539 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002540 if (ctl->free_space < bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002541 spin_unlock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002542 return -ENOSPC;
2543 }
2544
Chris Masonfa9c0d792009-04-03 09:47:43 -04002545 spin_lock(&cluster->lock);
2546
2547 /* someone already found a cluster, hooray */
2548 if (cluster->block_group) {
2549 ret = 0;
2550 goto out;
2551 }
Josef Bacik4e69b592011-03-21 10:11:24 -04002552
Josef Bacik3f7de032011-11-10 08:29:20 -05002553 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2554 min_bytes);
2555
2556 INIT_LIST_HEAD(&bitmaps);
Josef Bacik86d4a772011-05-25 13:03:16 -04002557 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002558 bytes + empty_size,
2559 cont1_bytes, min_bytes);
Josef Bacik4e69b592011-03-21 10:11:24 -04002560 if (ret)
Josef Bacik86d4a772011-05-25 13:03:16 -04002561 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03002562 offset, bytes + empty_size,
2563 cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04002564
2565 /* Clear our temporary list */
2566 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2567 list_del_init(&entry->list);
Josef Bacik4e69b592011-03-21 10:11:24 -04002568
2569 if (!ret) {
2570 atomic_inc(&block_group->count);
2571 list_add_tail(&cluster->block_group_list,
2572 &block_group->cluster_list);
2573 cluster->block_group = block_group;
Josef Bacik3f7de032011-11-10 08:29:20 -05002574 } else {
2575 trace_btrfs_failed_cluster_setup(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002576 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002577out:
2578 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08002579 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002580
2581 return ret;
2582}
2583
2584/*
2585 * simple code to zero out a cluster
2586 */
2587void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2588{
2589 spin_lock_init(&cluster->lock);
2590 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00002591 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002592 cluster->max_size = 0;
2593 INIT_LIST_HEAD(&cluster->block_group_list);
2594 cluster->block_group = NULL;
2595}
2596
Li Zefan7fe1e642011-12-29 14:47:27 +08002597static int do_trimming(struct btrfs_block_group_cache *block_group,
2598 u64 *total_trimmed, u64 start, u64 bytes,
2599 u64 reserved_start, u64 reserved_bytes)
2600{
2601 struct btrfs_space_info *space_info = block_group->space_info;
2602 struct btrfs_fs_info *fs_info = block_group->fs_info;
2603 int ret;
2604 int update = 0;
2605 u64 trimmed = 0;
2606
2607 spin_lock(&space_info->lock);
2608 spin_lock(&block_group->lock);
2609 if (!block_group->ro) {
2610 block_group->reserved += reserved_bytes;
2611 space_info->bytes_reserved += reserved_bytes;
2612 update = 1;
2613 }
2614 spin_unlock(&block_group->lock);
2615 spin_unlock(&space_info->lock);
2616
2617 ret = btrfs_error_discard_extent(fs_info->extent_root,
2618 start, bytes, &trimmed);
2619 if (!ret)
2620 *total_trimmed += trimmed;
2621
2622 btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2623
2624 if (update) {
2625 spin_lock(&space_info->lock);
2626 spin_lock(&block_group->lock);
2627 if (block_group->ro)
2628 space_info->bytes_readonly += reserved_bytes;
2629 block_group->reserved -= reserved_bytes;
2630 space_info->bytes_reserved -= reserved_bytes;
2631 spin_unlock(&space_info->lock);
2632 spin_unlock(&block_group->lock);
2633 }
2634
2635 return ret;
2636}
2637
2638static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2639 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
Li Dongyangf7039b12011-03-24 10:24:28 +00002640{
Li Zefan34d52cb2011-03-29 13:46:06 +08002641 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan7fe1e642011-12-29 14:47:27 +08002642 struct btrfs_free_space *entry;
2643 struct rb_node *node;
Li Dongyangf7039b12011-03-24 10:24:28 +00002644 int ret = 0;
Li Zefan7fe1e642011-12-29 14:47:27 +08002645 u64 extent_start;
2646 u64 extent_bytes;
2647 u64 bytes;
Li Dongyangf7039b12011-03-24 10:24:28 +00002648
2649 while (start < end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002650 spin_lock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002651
Li Zefan34d52cb2011-03-29 13:46:06 +08002652 if (ctl->free_space < minlen) {
2653 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002654 break;
2655 }
2656
Li Zefan34d52cb2011-03-29 13:46:06 +08002657 entry = tree_search_offset(ctl, start, 0, 1);
Li Zefan7fe1e642011-12-29 14:47:27 +08002658 if (!entry) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002659 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002660 break;
2661 }
2662
Li Zefan7fe1e642011-12-29 14:47:27 +08002663 /* skip bitmaps */
2664 while (entry->bitmap) {
2665 node = rb_next(&entry->offset_index);
2666 if (!node) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002667 spin_unlock(&ctl->tree_lock);
Li Zefan7fe1e642011-12-29 14:47:27 +08002668 goto out;
Li Dongyangf7039b12011-03-24 10:24:28 +00002669 }
Li Zefan7fe1e642011-12-29 14:47:27 +08002670 entry = rb_entry(node, struct btrfs_free_space,
2671 offset_index);
Li Dongyangf7039b12011-03-24 10:24:28 +00002672 }
2673
Li Zefan7fe1e642011-12-29 14:47:27 +08002674 if (entry->offset >= end) {
2675 spin_unlock(&ctl->tree_lock);
2676 break;
2677 }
2678
2679 extent_start = entry->offset;
2680 extent_bytes = entry->bytes;
2681 start = max(start, extent_start);
2682 bytes = min(extent_start + extent_bytes, end) - start;
2683 if (bytes < minlen) {
2684 spin_unlock(&ctl->tree_lock);
2685 goto next;
2686 }
2687
2688 unlink_free_space(ctl, entry);
2689 kmem_cache_free(btrfs_free_space_cachep, entry);
2690
Li Zefan34d52cb2011-03-29 13:46:06 +08002691 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002692
Li Zefan7fe1e642011-12-29 14:47:27 +08002693 ret = do_trimming(block_group, total_trimmed, start, bytes,
2694 extent_start, extent_bytes);
2695 if (ret)
2696 break;
2697next:
Li Dongyangf7039b12011-03-24 10:24:28 +00002698 start += bytes;
Li Dongyangf7039b12011-03-24 10:24:28 +00002699
2700 if (fatal_signal_pending(current)) {
2701 ret = -ERESTARTSYS;
2702 break;
2703 }
2704
2705 cond_resched();
2706 }
Li Zefan7fe1e642011-12-29 14:47:27 +08002707out:
2708 return ret;
2709}
2710
2711static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2712 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2713{
2714 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2715 struct btrfs_free_space *entry;
2716 int ret = 0;
2717 int ret2;
2718 u64 bytes;
2719 u64 offset = offset_to_bitmap(ctl, start);
2720
2721 while (offset < end) {
2722 bool next_bitmap = false;
2723
2724 spin_lock(&ctl->tree_lock);
2725
2726 if (ctl->free_space < minlen) {
2727 spin_unlock(&ctl->tree_lock);
2728 break;
2729 }
2730
2731 entry = tree_search_offset(ctl, offset, 1, 0);
2732 if (!entry) {
2733 spin_unlock(&ctl->tree_lock);
2734 next_bitmap = true;
2735 goto next;
2736 }
2737
2738 bytes = minlen;
2739 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2740 if (ret2 || start >= end) {
2741 spin_unlock(&ctl->tree_lock);
2742 next_bitmap = true;
2743 goto next;
2744 }
2745
2746 bytes = min(bytes, end - start);
2747 if (bytes < minlen) {
2748 spin_unlock(&ctl->tree_lock);
2749 goto next;
2750 }
2751
2752 bitmap_clear_bits(ctl, entry, start, bytes);
2753 if (entry->bytes == 0)
2754 free_bitmap(ctl, entry);
2755
2756 spin_unlock(&ctl->tree_lock);
2757
2758 ret = do_trimming(block_group, total_trimmed, start, bytes,
2759 start, bytes);
2760 if (ret)
2761 break;
2762next:
2763 if (next_bitmap) {
2764 offset += BITS_PER_BITMAP * ctl->unit;
2765 } else {
2766 start += bytes;
2767 if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2768 offset += BITS_PER_BITMAP * ctl->unit;
2769 }
2770
2771 if (fatal_signal_pending(current)) {
2772 ret = -ERESTARTSYS;
2773 break;
2774 }
2775
2776 cond_resched();
2777 }
2778
2779 return ret;
2780}
2781
2782int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2783 u64 *trimmed, u64 start, u64 end, u64 minlen)
2784{
2785 int ret;
2786
2787 *trimmed = 0;
2788
2789 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2790 if (ret)
2791 return ret;
2792
2793 ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
Li Dongyangf7039b12011-03-24 10:24:28 +00002794
2795 return ret;
2796}
Li Zefan581bb052011-04-20 10:06:11 +08002797
2798/*
2799 * Find the left-most item in the cache tree, and then return the
2800 * smallest inode number in the item.
2801 *
2802 * Note: the returned inode number may not be the smallest one in
2803 * the tree, if the left-most item is a bitmap.
2804 */
2805u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2806{
2807 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2808 struct btrfs_free_space *entry = NULL;
2809 u64 ino = 0;
2810
2811 spin_lock(&ctl->tree_lock);
2812
2813 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2814 goto out;
2815
2816 entry = rb_entry(rb_first(&ctl->free_space_offset),
2817 struct btrfs_free_space, offset_index);
2818
2819 if (!entry->bitmap) {
2820 ino = entry->offset;
2821
2822 unlink_free_space(ctl, entry);
2823 entry->offset++;
2824 entry->bytes--;
2825 if (!entry->bytes)
2826 kmem_cache_free(btrfs_free_space_cachep, entry);
2827 else
2828 link_free_space(ctl, entry);
2829 } else {
2830 u64 offset = 0;
2831 u64 count = 1;
2832 int ret;
2833
2834 ret = search_bitmap(ctl, entry, &offset, &count);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002835 /* Logic error; Should be empty if it can't find anything */
Li Zefan581bb052011-04-20 10:06:11 +08002836 BUG_ON(ret);
2837
2838 ino = offset;
2839 bitmap_clear_bits(ctl, entry, offset, 1);
2840 if (entry->bytes == 0)
2841 free_bitmap(ctl, entry);
2842 }
2843out:
2844 spin_unlock(&ctl->tree_lock);
2845
2846 return ino;
2847}
Li Zefan82d59022011-04-20 10:33:24 +08002848
2849struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2850 struct btrfs_path *path)
2851{
2852 struct inode *inode = NULL;
2853
2854 spin_lock(&root->cache_lock);
2855 if (root->cache_inode)
2856 inode = igrab(root->cache_inode);
2857 spin_unlock(&root->cache_lock);
2858 if (inode)
2859 return inode;
2860
2861 inode = __lookup_free_space_inode(root, path, 0);
2862 if (IS_ERR(inode))
2863 return inode;
2864
2865 spin_lock(&root->cache_lock);
David Sterba7841cb22011-05-31 18:07:27 +02002866 if (!btrfs_fs_closing(root->fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002867 root->cache_inode = igrab(inode);
2868 spin_unlock(&root->cache_lock);
2869
2870 return inode;
2871}
2872
2873int create_free_ino_inode(struct btrfs_root *root,
2874 struct btrfs_trans_handle *trans,
2875 struct btrfs_path *path)
2876{
2877 return __create_free_space_inode(root, trans, path,
2878 BTRFS_FREE_INO_OBJECTID, 0);
2879}
2880
2881int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2882{
2883 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2884 struct btrfs_path *path;
2885 struct inode *inode;
2886 int ret = 0;
2887 u64 root_gen = btrfs_root_generation(&root->root_item);
2888
Chris Mason4b9465c2011-06-03 09:36:29 -04002889 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2890 return 0;
2891
Li Zefan82d59022011-04-20 10:33:24 +08002892 /*
2893 * If we're unmounting then just return, since this does a search on the
2894 * normal root and not the commit root and we could deadlock.
2895 */
David Sterba7841cb22011-05-31 18:07:27 +02002896 if (btrfs_fs_closing(fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002897 return 0;
2898
2899 path = btrfs_alloc_path();
2900 if (!path)
2901 return 0;
2902
2903 inode = lookup_free_ino_inode(root, path);
2904 if (IS_ERR(inode))
2905 goto out;
2906
2907 if (root_gen != BTRFS_I(inode)->generation)
2908 goto out_put;
2909
2910 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2911
2912 if (ret < 0)
2913 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2914 "root %llu\n", root->root_key.objectid);
2915out_put:
2916 iput(inode);
2917out:
2918 btrfs_free_path(path);
2919 return ret;
2920}
2921
2922int btrfs_write_out_ino_cache(struct btrfs_root *root,
2923 struct btrfs_trans_handle *trans,
2924 struct btrfs_path *path)
2925{
2926 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2927 struct inode *inode;
2928 int ret;
2929
Chris Mason4b9465c2011-06-03 09:36:29 -04002930 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2931 return 0;
2932
Li Zefan82d59022011-04-20 10:33:24 +08002933 inode = lookup_free_ino_inode(root, path);
2934 if (IS_ERR(inode))
2935 return 0;
2936
2937 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
Josef Bacikc09544e2011-08-30 10:19:10 -04002938 if (ret) {
2939 btrfs_delalloc_release_metadata(inode, inode->i_size);
2940#ifdef DEBUG
Li Zefan82d59022011-04-20 10:33:24 +08002941 printk(KERN_ERR "btrfs: failed to write free ino cache "
2942 "for root %llu\n", root->root_key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04002943#endif
2944 }
Li Zefan82d59022011-04-20 10:33:24 +08002945
2946 iput(inode);
2947 return ret;
2948}